diff --git a/doc/src/accelerate_kokkos.txt b/doc/src/accelerate_kokkos.txt
index 1a45c04a1b5b1ae469d3f0ed620b90366fb42c9a..3d31344c247a73f6dc2405545e1014e63b628d16 100644
--- a/doc/src/accelerate_kokkos.txt
+++ b/doc/src/accelerate_kokkos.txt
@@ -110,14 +110,14 @@ mpirun -np 96 -ppn 12 lmp_g++ -k on t 20 -sf kk -in in.lj   # ditto on 8 Phis :p
 [Required hardware/software:]
 
 Kokkos support within LAMMPS must be built with a C++11 compatible
-compiler.  If using gcc, version 4.8.1 or later is required.
+compiler.  If using gcc, version 4.7.2 or later is required.
 
 To build with Kokkos support for CPUs, your compiler must support the
 OpenMP interface.  You should have one or more multi-core CPUs so that
 multiple threads can be launched by each MPI task running on a CPU.
 
 To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software
-version 6.5 or later must be installed on your system.  See the
+version 7.5 or later must be installed on your system.  See the
 discussion for the "GPU"_accelerate_gpu.html package for details of
 how to check and do this.
 
diff --git a/lib/kokkos/.gitignore b/lib/kokkos/.gitignore
deleted file mode 100644
index f9d16be1558495fb95e3f5c4b785eefd3b3aa854..0000000000000000000000000000000000000000
--- a/lib/kokkos/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# Standard ignores
-*~
-*.pyc
-\#*#
-.#*
-.*.swp
-.cproject
-.project
diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md
new file mode 100644
index 0000000000000000000000000000000000000000..a444f08eed9a9b3f7dc376435f73d5334a00d3ee
--- /dev/null
+++ b/lib/kokkos/CHANGELOG.md
@@ -0,0 +1,284 @@
+# Change Log
+
+## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)
+
+**Implemented enhancements:**
+
+- Add CMake option to enable Cuda Lambda support [\#589](https://github.com/kokkos/kokkos/issues/589)
+- Add CMake option to enable Cuda RDC support [\#588](https://github.com/kokkos/kokkos/issues/588)
+- Add Initial Intel Sky Lake Xeon-HPC Compiler Support to Kokkos Make System [\#584](https://github.com/kokkos/kokkos/issues/584)
+- Building Tutorial Examples  [\#582](https://github.com/kokkos/kokkos/issues/582)
+- Internal way for using ThreadVectorRange without TeamHandle  [\#574](https://github.com/kokkos/kokkos/issues/574)
+- Testing: Add testing for uvm and rdc [\#571](https://github.com/kokkos/kokkos/issues/571)
+- Profiling: Add Memory Tracing and Region Markers [\#557](https://github.com/kokkos/kokkos/issues/557)
+- nvcc\_wrapper not installed with Kokkos built with CUDA through CMake [\#543](https://github.com/kokkos/kokkos/issues/543)
+- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
+- Benchmarks: Add Gather benchmark [\#536](https://github.com/kokkos/kokkos/issues/536)
+- Testing: add spot\_check option to test\_all\_sandia [\#535](https://github.com/kokkos/kokkos/issues/535)
+- Deprecate Kokkos::Impl::VerifyExecutionCanAccessMemorySpace [\#527](https://github.com/kokkos/kokkos/issues/527)
+- Add AtomicAdd support for 64bit float for Pascal [\#522](https://github.com/kokkos/kokkos/issues/522)
+- Add Restrict and Aligned memory trait [\#517](https://github.com/kokkos/kokkos/issues/517)
+- Kokkos Tests are Not Run using Compiler Optimization [\#501](https://github.com/kokkos/kokkos/issues/501)
+- Add support for clang 3.7 w/ openmp backend [\#393](https://github.com/kokkos/kokkos/issues/393)
+- Provide an error throw class [\#79](https://github.com/kokkos/kokkos/issues/79)
+
+**Fixed bugs:**
+
+- Cuda UVM Allocation test broken with UVM as default space [\#586](https://github.com/kokkos/kokkos/issues/586)
+- Bug \(develop branch only\): multiple tests are now failing when forcing uvm usage. [\#570](https://github.com/kokkos/kokkos/issues/570)
+- Error in generate\_makefile.sh for Kokkos when Compiler is Empty String/Fails [\#568](https://github.com/kokkos/kokkos/issues/568)
+- XL 13.1.4 incorrect C++11 flag [\#553](https://github.com/kokkos/kokkos/issues/553)
+- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
+- Installing Library on MAC broken due to cp -u [\#539](https://github.com/kokkos/kokkos/issues/539)
+- Intel Nightly Testing with Debug enabled fails [\#534](https://github.com/kokkos/kokkos/issues/534)
+
+## [2.02.01](https://github.com/kokkos/kokkos/tree/2.02.01) (2016-11-01)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.00...2.02.01)
+
+**Implemented enhancements:**
+
+- Add Changelog generation to our process. [\#506](https://github.com/kokkos/kokkos/issues/506)
+
+**Fixed bugs:**
+
+- Test scratch\_request fails in Serial with Debug enabled [\#520](https://github.com/kokkos/kokkos/issues/520)
+- Bug In BoundsCheck for DynRankView [\#516](https://github.com/kokkos/kokkos/issues/516)
+
+## [2.02.00](https://github.com/kokkos/kokkos/tree/2.02.00) (2016-10-30)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.10...2.02.00)
+
+**Implemented enhancements:**
+
+- Add PowerPC assembly for grabbing clock register in memory pool [\#511](https://github.com/kokkos/kokkos/issues/511)
+- Add GCC 6.x support [\#508](https://github.com/kokkos/kokkos/issues/508)
+- Test install and build against installed library [\#498](https://github.com/kokkos/kokkos/issues/498)
+- Makefile.kokkos adds expt-extended-lambda to cuda build with clang [\#490](https://github.com/kokkos/kokkos/issues/490)
+- Add top-level makefile option to just test kokkos-core unit-test [\#485](https://github.com/kokkos/kokkos/issues/485)
+- Split and harmonize Object Files of Core UnitTests to increase build parallelism [\#484](https://github.com/kokkos/kokkos/issues/484)
+- LayoutLeft to LayoutLeft subview for 3D and 4D views [\#473](https://github.com/kokkos/kokkos/issues/473)
+- Add official Cuda 8.0 support [\#468](https://github.com/kokkos/kokkos/issues/468)
+- Allow C++1Z Flag for Class Lambda capture [\#465](https://github.com/kokkos/kokkos/issues/465)
+- Add Clang 4.0+ compilation of Cuda code [\#455](https://github.com/kokkos/kokkos/issues/455)
+- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
+- Add name of view to "View bounds error" [\#432](https://github.com/kokkos/kokkos/issues/432)
+- Move Sort Binning Operators into Kokkos namespace [\#421](https://github.com/kokkos/kokkos/issues/421)
+- TaskPolicy - generate error when attempt to use uninitialized  [\#396](https://github.com/kokkos/kokkos/issues/396)
+- Import WithoutInitializing and AllowPadding into Kokkos namespace [\#325](https://github.com/kokkos/kokkos/issues/325)
+- TeamThreadRange requires begin, end to be the same type [\#305](https://github.com/kokkos/kokkos/issues/305)
+- CudaUVMSpace should track \# allocations, due to CUDA limit on \# UVM allocations [\#300](https://github.com/kokkos/kokkos/issues/300)
+- Remove old View and its infrastructure [\#259](https://github.com/kokkos/kokkos/issues/259)
+
+**Fixed bugs:**
+
+- Bug in TestCuda\_Other.cpp: most likely assembly inserted into Device code [\#515](https://github.com/kokkos/kokkos/issues/515)
+- Cuda Compute Capability check of GPU is outdated [\#509](https://github.com/kokkos/kokkos/issues/509)
+- multi\_scratch test with hwloc and pthreads seg-faults.  [\#504](https://github.com/kokkos/kokkos/issues/504)
+- generate\_makefile.bash: "make install" is broken [\#503](https://github.com/kokkos/kokkos/issues/503)
+- make clean in Out of Source Build/Tests Does Not Work Correctly [\#502](https://github.com/kokkos/kokkos/issues/502)
+- Makefiles for test and examples have issues in Cuda when CXX is not explicitly specified [\#497](https://github.com/kokkos/kokkos/issues/497)
+- Dispatch lambda test directly inside GTEST macro doesn't work with nvcc [\#491](https://github.com/kokkos/kokkos/issues/491)
+- UnitTests with HWLOC enabled fail if run with mpirun bound to a single core [\#489](https://github.com/kokkos/kokkos/issues/489)
+- Failing Reducer Test on Mac with Pthreads [\#479](https://github.com/kokkos/kokkos/issues/479)
+- make test Dumps Error with Clang Not Found [\#471](https://github.com/kokkos/kokkos/issues/471)
+- OpenMP TeamPolicy member broadcast not using correct volatile shared variable [\#424](https://github.com/kokkos/kokkos/issues/424)
+- TaskPolicy - generate error when attempt to use uninitialized  [\#396](https://github.com/kokkos/kokkos/issues/396)
+- New task policy implementation is pulling in old experimental code. [\#372](https://github.com/kokkos/kokkos/issues/372)
+- MemoryPool unit test hangs on Power8 with GCC 6.1.0 [\#298](https://github.com/kokkos/kokkos/issues/298)
+
+## [2.01.10](https://github.com/kokkos/kokkos/tree/2.01.10) (2016-09-27)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.06...2.01.10)
+
+**Implemented enhancements:**
+
+- Enable Profiling by default in Tribits build [\#438](https://github.com/kokkos/kokkos/issues/438)
+- parallel\_reduce\(0\), parallel\_scan\(0\) unit tests [\#436](https://github.com/kokkos/kokkos/issues/436)
+- data\(\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
+- Fix tutorials to track new Kokkos::View [\#323](https://github.com/kokkos/kokkos/issues/323)
+- Rename team policy set\_scratch\_size. [\#195](https://github.com/kokkos/kokkos/issues/195)
+
+**Fixed bugs:**
+
+- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
+- Makefile spits syntax error [\#435](https://github.com/kokkos/kokkos/issues/435)
+- Kokkos::sort fails for view with all the same values [\#422](https://github.com/kokkos/kokkos/issues/422)
+- Generic Reducers: can't accept inline constructed reducer [\#404](https://github.com/kokkos/kokkos/issues/404)
+- data\\(\\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
+- const subview of const view with compile time dimensions on Cuda backend [\#310](https://github.com/kokkos/kokkos/issues/310)
+- Kokkos \(in Trilinos\) Causes Internal Compiler Error on CUDA 8.0.21-EA on POWER8 [\#307](https://github.com/kokkos/kokkos/issues/307)
+- Core Oversubscription Detection Broken? [\#159](https://github.com/kokkos/kokkos/issues/159)
+
+
+## [2.01.06](https://github.com/kokkos/kokkos/tree/2.01.06) (2016-09-02)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.00...2.01.06)
+
+**Implemented enhancements:**
+
+- Add "standard" reducers for lambda-supportable customized reduce [\#411](https://github.com/kokkos/kokkos/issues/411)
+- TaskPolicy - single thread back-end execution [\#390](https://github.com/kokkos/kokkos/issues/390)
+- Kokkos master clone tag [\#387](https://github.com/kokkos/kokkos/issues/387)
+- Query memory requirements from task policy [\#378](https://github.com/kokkos/kokkos/issues/378)
+- Output order of test\_atomic.cpp is confusing [\#373](https://github.com/kokkos/kokkos/issues/373)
+- Missing testing for atomics [\#341](https://github.com/kokkos/kokkos/issues/341)
+- Feature request for Kokkos to provide Kokkos::atomic\_fetch\_max and atomic\_fetch\_min [\#336](https://github.com/kokkos/kokkos/issues/336)
+- TaskPolicy\<Cuda\> performance requires teams mapped to warps [\#218](https://github.com/kokkos/kokkos/issues/218)
+
+**Fixed bugs:**
+
+- Reduce with Teams broken for custom initialize [\#407](https://github.com/kokkos/kokkos/issues/407)
+- Failing Kokkos build on Debian [\#402](https://github.com/kokkos/kokkos/issues/402)
+- Failing Tests on NVIDIA Pascal GPUs [\#398](https://github.com/kokkos/kokkos/issues/398)
+- Algorithms: fill\_random assumes dimensions fit in unsigned int [\#389](https://github.com/kokkos/kokkos/issues/389)
+- Kokkos::subview with RandomAccess Memory Trait [\#385](https://github.com/kokkos/kokkos/issues/385)
+- Build warning \(signed / unsigned comparison\) in Cuda implementation [\#365](https://github.com/kokkos/kokkos/issues/365)
+- wrong results for a parallel\_reduce with CUDA8 / Maxwell50 [\#352](https://github.com/kokkos/kokkos/issues/352)
+- Hierarchical parallelism - 3 level unit test [\#344](https://github.com/kokkos/kokkos/issues/344)
+- Can I allocate a View w/ both WithoutInitializing & AllowPadding? [\#324](https://github.com/kokkos/kokkos/issues/324)
+- subview View layout determination [\#309](https://github.com/kokkos/kokkos/issues/309)
+- Unit tests with Cuda - Maxwell [\#196](https://github.com/kokkos/kokkos/issues/196)
+
+## [2.01.00](https://github.com/kokkos/kokkos/tree/2.01.00) (2016-07-21)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/End_C++98...2.01.00)
+
+**Implemented enhancements:**
+
+- Edit ViewMapping so assigning Views with the same custom layout compiles when const casting [\#327](https://github.com/kokkos/kokkos/issues/327)
+- DynRankView: Performance improvement for operator\(\) [\#321](https://github.com/kokkos/kokkos/issues/321)
+- Interoperability between static and dynamic rank views [\#295](https://github.com/kokkos/kokkos/issues/295)
+- subview member function ? [\#280](https://github.com/kokkos/kokkos/issues/280)
+- Inter-operatibility between View and DynRankView. [\#245](https://github.com/kokkos/kokkos/issues/245)
+- \(Trilinos\) build warning in atomic\_assign, with Kokkos::complex [\#177](https://github.com/kokkos/kokkos/issues/177)
+- View\<\>::shmem\_size should runtime check for number of arguments equal to rank [\#176](https://github.com/kokkos/kokkos/issues/176)
+- Custom reduction join via lambda argument [\#99](https://github.com/kokkos/kokkos/issues/99)
+- DynRankView with 0 dimensions passed in at construction [\#293](https://github.com/kokkos/kokkos/issues/293)
+- Inject view\_alloc and friends into Kokkos namespace [\#292](https://github.com/kokkos/kokkos/issues/292)
+- Less restrictive TeamPolicy reduction on Cuda [\#286](https://github.com/kokkos/kokkos/issues/286)
+- deep\_copy using remap with source execution space [\#267](https://github.com/kokkos/kokkos/issues/267)
+- Suggestion:  Enable opt-in L1 caching via nvcc-wrapper [\#261](https://github.com/kokkos/kokkos/issues/261)
+- More flexible create\_mirror functions [\#260](https://github.com/kokkos/kokkos/issues/260)
+- Rename View::memory\_span to View::required\_allocation\_size [\#256](https://github.com/kokkos/kokkos/issues/256)
+- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
+- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
+- Kokkos::Timer [\#234](https://github.com/kokkos/kokkos/issues/234)
+- Fence CudaUVMSpace allocations [\#230](https://github.com/kokkos/kokkos/issues/230)
+- View::operator\(\) accept std::is\_integral and std::is\_enum [\#227](https://github.com/kokkos/kokkos/issues/227)
+- Allocating zero size View [\#216](https://github.com/kokkos/kokkos/issues/216)
+- Thread scalable memory pool [\#212](https://github.com/kokkos/kokkos/issues/212)
+- Add a way to disable memory leak output [\#194](https://github.com/kokkos/kokkos/issues/194)
+- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
+- Runtime rank wrapper for View [\#189](https://github.com/kokkos/kokkos/issues/189)
+- Profiling Interface [\#158](https://github.com/kokkos/kokkos/issues/158)
+- Fix View assignment \(of managed to unmanaged\) [\#153](https://github.com/kokkos/kokkos/issues/153)
+- Add unit test for assignment of managed View to unmanaged View [\#152](https://github.com/kokkos/kokkos/issues/152)
+- Check for oversubscription of threads with MPI in Kokkos::initialize [\#149](https://github.com/kokkos/kokkos/issues/149)
+- Dynamic resizeable 1dimensional view [\#143](https://github.com/kokkos/kokkos/issues/143)
+- Develop TaskPolicy for CUDA [\#142](https://github.com/kokkos/kokkos/issues/142)
+- New View : Test Compilation Downstream [\#138](https://github.com/kokkos/kokkos/issues/138)
+- New View Implementation [\#135](https://github.com/kokkos/kokkos/issues/135)
+- Add variant of subview that lets users add traits [\#134](https://github.com/kokkos/kokkos/issues/134)
+- NVCC-WRAPPER: Add --host-only flag [\#121](https://github.com/kokkos/kokkos/issues/121)
+- Address gtest issue with TriBITS Kokkos build outside of Trilinos [\#117](https://github.com/kokkos/kokkos/issues/117)
+- Make tests pass with -expt-extended-lambda on CUDA [\#108](https://github.com/kokkos/kokkos/issues/108)
+- Dynamic scheduling for parallel\_for and parallel\_reduce [\#106](https://github.com/kokkos/kokkos/issues/106)
+- Runtime or compile time error when reduce functor's join is not properly specified as const member function or with volatile arguments [\#105](https://github.com/kokkos/kokkos/issues/105)
+- Error out when the number of threads is modified after kokkos is initialized [\#104](https://github.com/kokkos/kokkos/issues/104)
+- Porting to POWER and remove assumption of X86 default [\#103](https://github.com/kokkos/kokkos/issues/103)
+- Dynamic scheduling option for RangePolicy [\#100](https://github.com/kokkos/kokkos/issues/100)
+- SharedMemory Support for Lambdas [\#81](https://github.com/kokkos/kokkos/issues/81)
+- Recommended TeamSize for Lambdas [\#80](https://github.com/kokkos/kokkos/issues/80)
+- Add Aggressive Vectorization Compilation mode [\#72](https://github.com/kokkos/kokkos/issues/72)
+- Dynamic scheduling team execution policy [\#53](https://github.com/kokkos/kokkos/issues/53)
+- UVM allocations in multi-GPU systems [\#50](https://github.com/kokkos/kokkos/issues/50)
+- Synchronic in Kokkos::Impl [\#44](https://github.com/kokkos/kokkos/issues/44)
+- index and dimension types in for loops [\#28](https://github.com/kokkos/kokkos/issues/28)
+- Subview assign of 1D Strided with stride 1 to LayoutLeft/Right [\#1](https://github.com/kokkos/kokkos/issues/1)
+
+**Fixed bugs:**
+
+- misspelled variable name in Kokkos\_Atomic\_Fetch + missing unit tests [\#340](https://github.com/kokkos/kokkos/issues/340)
+- seg fault Kokkos::Impl::CudaInternal::print\_configuration [\#338](https://github.com/kokkos/kokkos/issues/338)
+- Clang compiler error with named parallel\_reduce, tags, and TeamPolicy. [\#335](https://github.com/kokkos/kokkos/issues/335)
+- Shared Memory Allocation Error at parallel\_reduce [\#311](https://github.com/kokkos/kokkos/issues/311)
+- DynRankView: Fix resize and realloc [\#303](https://github.com/kokkos/kokkos/issues/303)
+- Scratch memory and dynamic scheduling [\#279](https://github.com/kokkos/kokkos/issues/279)
+- MemoryPool infinite loop when out of memory [\#312](https://github.com/kokkos/kokkos/issues/312)
+- Kokkos DynRankView changes break Sacado and Panzer [\#299](https://github.com/kokkos/kokkos/issues/299)
+- MemoryPool fails to compile on non-cuda non-x86 [\#297](https://github.com/kokkos/kokkos/issues/297)
+- Random Number Generator Fix [\#296](https://github.com/kokkos/kokkos/issues/296)
+- View template parameter ordering Bug [\#282](https://github.com/kokkos/kokkos/issues/282)
+- Serial task policy broken. [\#281](https://github.com/kokkos/kokkos/issues/281)
+- deep\_copy with LayoutStride should not memcpy [\#262](https://github.com/kokkos/kokkos/issues/262)
+- DualView::need\_sync should be a const method [\#248](https://github.com/kokkos/kokkos/issues/248)
+- Arbitrary-sized atomics on GPUs broken; loop forever [\#238](https://github.com/kokkos/kokkos/issues/238)
+- boolean reduction value\_type changes answer [\#225](https://github.com/kokkos/kokkos/issues/225)
+- Custom init\(\) function for parallel\_reduce with array value\_type [\#210](https://github.com/kokkos/kokkos/issues/210)
+- unit\_test Makefile is Broken - Recursively Calls itself until Machine Apocalypse. [\#202](https://github.com/kokkos/kokkos/issues/202)
+- nvcc\_wrapper Does Not Support  -Xcompiler \<compiler option\> [\#198](https://github.com/kokkos/kokkos/issues/198)
+- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
+- Kokkos Threads Backend impl\_shared\_alloc Broken on Intel 16.1 \(Shepard Haswell\) [\#186](https://github.com/kokkos/kokkos/issues/186)
+- pthread back end hangs if used uninitialized [\#182](https://github.com/kokkos/kokkos/issues/182)
+- parallel\_reduce of size 0, not calling init/join [\#175](https://github.com/kokkos/kokkos/issues/175)
+- Bug in Threads with OpenMP enabled [\#173](https://github.com/kokkos/kokkos/issues/173)
+- KokkosExp\_SharedAlloc, m\_team\_work\_index inaccessible [\#166](https://github.com/kokkos/kokkos/issues/166)
+- 128-bit CAS without Assembly Broken? [\#161](https://github.com/kokkos/kokkos/issues/161)
+- fatal error: Cuda/Kokkos\_Cuda\_abort.hpp: No such file or directory [\#157](https://github.com/kokkos/kokkos/issues/157)
+- Power8: Fix OpenMP backend [\#139](https://github.com/kokkos/kokkos/issues/139)
+- Data race in Kokkos OpenMP initialization [\#131](https://github.com/kokkos/kokkos/issues/131)
+- parallel\_launch\_local\_memory and cuda 7.5 [\#125](https://github.com/kokkos/kokkos/issues/125)
+- Resize can fail with Cuda due to asynchronous dispatch [\#119](https://github.com/kokkos/kokkos/issues/119)
+- Qthread taskpolicy initialization bug. [\#92](https://github.com/kokkos/kokkos/issues/92)
+- Windows: sys/mman.h [\#89](https://github.com/kokkos/kokkos/issues/89)
+- Windows: atomic\_fetch\_sub\(\) [\#88](https://github.com/kokkos/kokkos/issues/88)
+- Windows: snprintf [\#87](https://github.com/kokkos/kokkos/issues/87)
+- Parallel\_Reduce with TeamPolicy and league size of 0 returns garbage [\#85](https://github.com/kokkos/kokkos/issues/85)
+- Throw with Cuda when using \(2D\) team\_policy parallel\_reduce with less than a warp size [\#76](https://github.com/kokkos/kokkos/issues/76)
+- Scalar views don't work with Kokkos::Atomic memory trait [\#69](https://github.com/kokkos/kokkos/issues/69)
+- Reduce the number of threads per team for Cuda [\#63](https://github.com/kokkos/kokkos/issues/63)
+- Named Kernels fail for reductions with CUDA [\#60](https://github.com/kokkos/kokkos/issues/60)
+- Kokkos View dimension\_\(\) for long returning unsigned int [\#20](https://github.com/kokkos/kokkos/issues/20)
+- atomic test hangs with LLVM [\#6](https://github.com/kokkos/kokkos/issues/6)
+- OpenMP Test should set omp\_set\_num\_threads to 1 [\#4](https://github.com/kokkos/kokkos/issues/4)
+
+**Closed issues:**
+
+- develop branch broken with CUDA 8 and --expt-extended-lambda  [\#354](https://github.com/kokkos/kokkos/issues/354)
+- --arch=KNL with Intel 2016 build failure [\#349](https://github.com/kokkos/kokkos/issues/349)
+- Error building with Cuda when passing -DKOKKOS\_CUDA\_USE\_LAMBDA to generate\_makefile.bash [\#343](https://github.com/kokkos/kokkos/issues/343)
+- Can I safely use int indices in a 2-D View with capacity \> 2B? [\#318](https://github.com/kokkos/kokkos/issues/318)
+- Kokkos::ViewAllocateWithoutInitializing is not working [\#317](https://github.com/kokkos/kokkos/issues/317)
+- Intel build on Mac OS X [\#277](https://github.com/kokkos/kokkos/issues/277)
+- deleted [\#271](https://github.com/kokkos/kokkos/issues/271)
+- Broken Mira build [\#268](https://github.com/kokkos/kokkos/issues/268)
+- 32-bit build [\#246](https://github.com/kokkos/kokkos/issues/246)
+- parallel\_reduce with RDC crashes linker [\#232](https://github.com/kokkos/kokkos/issues/232)
+- build of Kokkos\_Sparse\_MV\_impl\_spmv\_Serial.cpp.o fails if you use nvcc and have cuda disabled [\#209](https://github.com/kokkos/kokkos/issues/209)
+- Kokkos Serial execution space is not tested with TeamPolicy. [\#207](https://github.com/kokkos/kokkos/issues/207)
+- Unit test failure on Hansen  KokkosCore\_UnitTest\_Cuda\_MPI\_1 [\#200](https://github.com/kokkos/kokkos/issues/200)
+- nvcc compiler warning: calling a \_\_host\_\_ function from a \_\_host\_\_ \_\_device\_\_ function is not allowed [\#180](https://github.com/kokkos/kokkos/issues/180)
+- Intel 15 build error with defaulted "move" operators [\#171](https://github.com/kokkos/kokkos/issues/171)
+- missing libkokkos.a during Trilinos 12.4.2 build, yet other libkokkos\*.a libs are there [\#165](https://github.com/kokkos/kokkos/issues/165)
+- Tie atomic updates to execution space or even to thread team? \(speculation\) [\#144](https://github.com/kokkos/kokkos/issues/144)
+- New View: Compiletime/size Test [\#137](https://github.com/kokkos/kokkos/issues/137)
+- New View : Performance Test [\#136](https://github.com/kokkos/kokkos/issues/136)
+- Signed/unsigned  comparison warning in CUDA parallel [\#130](https://github.com/kokkos/kokkos/issues/130)
+- Kokkos::complex: Need op\* w/ std::complex & real [\#126](https://github.com/kokkos/kokkos/issues/126)
+- Use uintptr\_t for casting pointers [\#110](https://github.com/kokkos/kokkos/issues/110)
+- Default thread mapping behavior between P and Q threads. [\#91](https://github.com/kokkos/kokkos/issues/91)
+- Windows: Atomic\_Fetch\_Exchange\(\) return type [\#90](https://github.com/kokkos/kokkos/issues/90)
+- Synchronic unit test is way too long [\#84](https://github.com/kokkos/kokkos/issues/84)
+- nvcc\_wrapper -\> $\(NVCC\_WRAPPER\) [\#42](https://github.com/kokkos/kokkos/issues/42)
+- Check compiler version and print helpful message [\#39](https://github.com/kokkos/kokkos/issues/39)
+- Kokkos shared memory on Cuda uses a lot of registers [\#31](https://github.com/kokkos/kokkos/issues/31)
+- Can not pass unit test `cuda.space` without a GT 720 [\#25](https://github.com/kokkos/kokkos/issues/25)
+- Makefile.kokkos lacks bounds checking option that CMake has [\#24](https://github.com/kokkos/kokkos/issues/24)
+- Kokkos can not complete unit tests with CUDA UVM enabled [\#23](https://github.com/kokkos/kokkos/issues/23)
+- Simplify teams + shared memory histogram example to remove vectorization [\#21](https://github.com/kokkos/kokkos/issues/21)
+- Kokkos needs to rever to ${PROJECT\_NAME}\_ENABLE\_CXX11 not Trilinos\_ENABLE\_CXX11 [\#17](https://github.com/kokkos/kokkos/issues/17)
+- Kokkos Base Makefile adds AVX to KNC Build [\#16](https://github.com/kokkos/kokkos/issues/16)
+- MS Visual Studio 2013 Build Errors [\#9](https://github.com/kokkos/kokkos/issues/9)
+- subview\(X, ALL\(\), j\) for 2-D LayoutRight View X: should it view a column? [\#5](https://github.com/kokkos/kokkos/issues/5)
+
+## [End_C++98](https://github.com/kokkos/kokkos/tree/End_C++98) (2015-04-15)
+
+
+\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt
index 1219352f73dc47360555639b1f4c3ddde410e9a5..2b2b9be6aa3b9bc7ae04a3c462a9b5ef7c986543 100644
--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@@ -34,8 +34,8 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
 # for compatibility with Kokkos' Makefile build system.
 
 TRIBITS_ADD_OPTION_AND_DEFINE(
-  ${PACKAGE_NAME}_ENABLE_DEBUG
-  ${PACKAGE_NAME_UC}_HAVE_DEBUG
+  Kokkos_ENABLE_DEBUG
+  KOKKOS_HAVE_DEBUG
   "Enable run-time debug checks.  These checks may be expensive, so they are disabled by default in a release build."
   ${${PROJECT_NAME}_ENABLE_DEBUG}
 )
@@ -57,7 +57,21 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
 TRIBITS_ADD_OPTION_AND_DEFINE(
   Kokkos_ENABLE_Cuda_UVM
   KOKKOS_USE_CUDA_UVM
-  "Enable CUDA Unified Virtual Memory support in Kokkos."
+  "Enable CUDA Unified Virtual Memory as the default in Kokkos."
+  OFF
+  )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+  Kokkos_ENABLE_Cuda_RDC
+  KOKKOS_HAVE_CUDA_RDC
+  "Enable CUDA Relocatable Device Code support in Kokkos."
+  OFF
+  )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+  Kokkos_ENABLE_Cuda_Lambda
+  KOKKOS_HAVE_CUDA_LAMBDA
+  "Enable CUDA LAMBDA support in Kokkos."
   OFF
   )
 
@@ -72,6 +86,9 @@ ASSERT_DEFINED(TPL_ENABLE_Pthread)
 IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
   MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF).  This is not allowed.  Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
 ENDIF ()
+IF (NOT TPL_ENABLE_Pthread)
+  ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
+ENDIF()
 
 TRIBITS_ADD_OPTION_AND_DEFINE(
   Kokkos_ENABLE_OpenMP
@@ -162,13 +179,28 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
 
 #------------------------------------------------------------------------------
 #
-# C) Process the subpackages for Kokkos
+# C) Install Kokkos' executable scripts
+#
+
+
+# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler.
+# Kokkos needs nvcc_wrapper in order to build.  Other libraries and
+# executables also need nvcc_wrapper.  Thus, we need to install it.
+# If the argument of DESTINATION is a relative path, CMake computes it
+# as relative to ${CMAKE_INSTALL_PATH}.
+
+INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin)
+
+
+#------------------------------------------------------------------------------
+#
+# D) Process the subpackages for Kokkos
 #
 
 TRIBITS_PROCESS_SUBPACKAGES()
 
 #
-# D) If Kokkos itself is enabled, process the Kokkos package
+# E) If Kokkos itself is enabled, process the Kokkos package
 #
 
 TRIBITS_PACKAGE_DEF()
diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos
index 73a332ee1110a2bff148f08ca3d9cc758ecabff3..038c252cf034654abcc5a6b100f6f99bd46663ee 100644
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@@ -7,25 +7,26 @@ CXXFLAGS=$(CCFLAGS)
 #Options: OpenMP,Serial,Pthreads,Cuda
 KOKKOS_DEVICES ?= "OpenMP"
 #KOKKOS_DEVICES ?= "Pthreads"
-#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW
+#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
 KOKKOS_ARCH ?= ""
 #Options: yes,no
 KOKKOS_DEBUG ?= "no"
 #Options: hwloc,librt,experimental_memkind
 KOKKOS_USE_TPLS ?= ""
-#Options: c++11
+#Options: c++11,c++1z
 KOKKOS_CXX_STANDARD ?= "c++11"
 #Options: aggressive_vectorization,disable_profiling
 KOKKOS_OPTIONS ?= ""
 
 #Default settings specific options
 #Options: force_uvm,use_ldg,rdc,enable_lambda
-KOKKOS_CUDA_OPTIONS ?= ""
+KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
 
 # Check for general settings
 
 KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
 KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
+KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
 
 # Check for external libraries
 KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
@@ -53,23 +54,71 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
 endif
 endif
 
+# Check for other Execution Spaces
+
+KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
+
+ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+  KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
+  CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
+  KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
+endif
+
+# Check OS
+
+KOKKOS_OS                      := $(shell uname -s)
+KOKKOS_INTERNAL_OS_CYGWIN      := $(shell uname -s | grep CYGWIN | wc -l)
+KOKKOS_INTERNAL_OS_LINUX       := $(shell uname -s | grep Linux  | wc -l)
+KOKKOS_INTERNAL_OS_DARWIN      := $(shell uname -s | grep Darwin | wc -l)
+
+# Check compiler
+
 KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version        2>&1 | grep "Intel Corporation" | wc -l)
 KOKKOS_INTERNAL_COMPILER_PGI   := $(shell $(CXX) --version        2>&1 | grep PGI   | wc -l)
 KOKKOS_INTERNAL_COMPILER_XL    := $(shell $(CXX) -qversion        2>&1 | grep XL    | wc -l)
 KOKKOS_INTERNAL_COMPILER_CRAY  := $(shell $(CXX) -craype-verbose  2>&1 | grep "CC-" | wc -l)
-KOKKOS_INTERNAL_OS_CYGWIN      := $(shell uname | grep CYGWIN | wc -l)
+KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(CXX) --version        2>&1 | grep "nvcc" | wc -l)
+ifneq ($(OMPI_CXX),)
+  KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(OMPI_CXX) --version   2>&1 | grep "nvcc" | wc -l)
+endif
+ifneq ($(MPICH_CXX),)
+  KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(MPICH_CXX) --version  2>&1 | grep "nvcc" | wc -l)
+endif
+KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version        2>&1 | grep "clang" | wc -l)
+
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
+  KOKKOS_INTERNAL_COMPILER_CLANG = 1
+endif
+ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
+  KOKKOS_INTERNAL_COMPILER_XL = 1
+endif
+
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+  KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
+  ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
+      $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)    
+    endif
+    KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
+  endif
+endif
+
 
 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
   KOKKOS_INTERNAL_OPENMP_FLAG := -mp 
 else
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-    KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
   else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-      # OpenMP is turned on by default in Cray compiler environment
-      KOKKOS_INTERNAL_OPENMP_FLAG :=
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+      KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
     else
-      KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
+      ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+        # OpenMP is turned on by default in Cray compiler environment
+        KOKKOS_INTERNAL_OPENMP_FLAG :=
+      else
+        KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
+      endif
     endif
   endif
 endif
@@ -84,13 +133,11 @@ else
       KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
     else
       KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
+      KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
     endif
   endif
 endif
 
-# Check for other Execution Spaces
-KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
-
 # Check for Kokkos Architecture settings
 
 #Intel based
@@ -98,6 +145,7 @@ KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC |
 KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
 
 #NVIDIA based
@@ -110,11 +158,13 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep
 KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
+                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
@@ -127,13 +177,16 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
+                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                       + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
 endif
 
 #ARM based
-KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
 
 #IBM based
 KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
@@ -145,17 +198,18 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
 
 #Any AVX?
-KOKKOS_INTERNAL_USE_ARCH_AVX       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX2      := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX        := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
 
 # Decide what ISA level we are able to support
-KOKKOS_INTERNAL_USE_ISA_X86_64     := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
+KOKKOS_INTERNAL_USE_ISA_X86_64     := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
 KOKKOS_INTERNAL_USE_ISA_KNC        := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE  := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
 
 #Incompatible flags?
-KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
+KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
 KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
 
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@@ -207,15 +261,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
+	tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
   	tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
+	tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
   	tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
+	tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
   	tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
@@ -230,9 +290,15 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
 	tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
 endif
 
+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
+        KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
+        tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
+        tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
+endif
+
 ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	KOKKOS_CXXFLAGS += -G
+ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+	KOKKOS_CXXFLAGS += -lineinfo
 endif
 	KOKKOS_CXXFLAGS += -g 
 	KOKKOS_LDFLAGS += -g -ldl
@@ -273,13 +339,14 @@ endif
 
 tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
 
+ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
 	tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
 endif
 
 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
 	tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
 endif
 
 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
@@ -289,27 +356,101 @@ ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
 endif
 
 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
-  tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += -expt-extended-lambda
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
+	tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
+	KOKKOS_CXXFLAGS += -expt-extended-lambda
+    else
+      $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
+    endif
+  endif
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
+  endif
+endif
 endif
 
 #Add Architecture flags
 
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+	KOKKOS_CXXFLAGS +=
+	KOKKOS_LDFLAGS +=
+    else
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+		KOKKOS_CXXFLAGS +=
+		KOKKOS_LDFLAGS +=
+	else
+		KOKKOS_CXXFLAGS += -march=armv8-a
+		KOKKOS_LDFLAGS += -march=armv8-a
+	endif
+    endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+	KOKKOS_CXXFLAGS +=
+	KOKKOS_LDFLAGS +=
+    else
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+		KOKKOS_CXXFLAGS +=
+		KOKKOS_LDFLAGS +=
+	else
+		KOKKOS_CXXFLAGS += -march=armv8.1-a
+		KOKKOS_LDFLAGS += -march=armv8.1-a
+	endif
+    endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
     ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
 	KOKKOS_CXXFLAGS +=
 	KOKKOS_LDFLAGS +=
-    else	
-	KOKKOS_CXXFLAGS += -mavx
-	KOKKOS_LDFLAGS += -mavx
+    else
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+		KOKKOS_CXXFLAGS +=
+		KOKKOS_LDFLAGS +=
+	else
+		KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
+		KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
+	endif
     endif
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+		KOKKOS_CXXFLAGS += -mavx
+		KOKKOS_LDFLAGS  += -mavx
+	else
+		ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+		else
+			ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) 
+				KOKKOS_CXXFLAGS += -tp=sandybridge
+				KOKKOS_LDFLAGS  += -tp=sandybridge
+			else
+				# Assume that this is a really a GNU compiler
+				KOKKOS_CXXFLAGS += -mavx
+				KOKKOS_LDFLAGS  += -mavx
+			endif
+		endif
+	endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
-	KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) 
+
+	else
+		# Assume that this is a really a GNU compiler or it could be XL on P8
+		KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
+		KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
+	endif
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
@@ -322,7 +463,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
 
 		else
 			ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) 
-
+				KOKKOS_CXXFLAGS += -tp=haswell
+				KOKKOS_LDFLAGS  += -tp=haswell
 			else
 				# Assume that this is a really a GNU compiler
 				KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
@@ -352,52 +494,85 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
 	endif
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+		KOKKOS_CXXFLAGS += -xCORE-AVX512
+		KOKKOS_LDFLAGS  += -xCORE-AVX512
+	else
+		ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+		else
+			ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+
+			else
+				# Nothing here yet
+				KOKKOS_CXXFLAGS += -march=skylake-avx512
+				KOKKOS_LDFLAGS  += -march=skylake-avx512
+			endif
+		endif
+	endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
 	KOKKOS_CXXFLAGS += -mmic
 	KOKKOS_LDFLAGS += -mmic
 endif
 
+#Figure out the architecture flag for Cuda
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+  KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch
+endif
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+  KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_30
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_32
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_35
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_37
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_50
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_52
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_53
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
     tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
     tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
-        KOKKOS_CXXFLAGS += -arch=sm_61
+        KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
+endif
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
+        KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
 endif
 endif
  
@@ -424,6 +599,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
 	KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
+	KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
 	KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 
 	KOKKOS_LIBS += -lcudart -lcuda
 endif
@@ -443,7 +619,7 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
 	KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
-	ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
 		KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
 	else
 		KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
@@ -451,6 +627,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
 endif
 
+#Explicitly set the GCC Toolchain for Clang
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
+    KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
+    KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
+    KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
+endif
+
 #With Cygwin functions such as fdopen and fileno are not defined 
 #when strict ansi is enabled. strict ansi gets enabled with --std=c++11
 #though. So we hard undefine it here. Not sure if that has any bad side effects
@@ -471,7 +655,7 @@ KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
 include $(KOKKOS_PATH)/Makefile.targets
 
 kokkos-clean:
-	-rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
+	rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
 
 libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
 	ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets
index 86929ea0fe6e9e2158923e6907c7b2a179e5af61..a48a5f6eb7ea78712b3f6caf695745b4ef18c043 100644
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@@ -14,20 +14,16 @@ Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
 Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
-Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
-Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
 Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
-Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
+Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
 Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
 Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
-KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
+Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
 Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
 
@@ -38,8 +34,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
 Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
-Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@@ -47,8 +41,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
-Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
@@ -67,6 +59,4 @@ endif
 
 Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
-Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
 
diff --git a/lib/kokkos/README b/lib/kokkos/README
index b094578af631b179e9744f744a823a1800bd885b..ffc1fe53b5c6a3d555e61626768df727b51a34a2 100644
--- a/lib/kokkos/README
+++ b/lib/kokkos/README
@@ -45,31 +45,32 @@ Primary tested compilers on X86 are:
   Intel 14.0.4
   Intel 15.0.2
   Intel 16.0.1
+  Intel 17.0.098
   Clang 3.5.2
   Clang 3.6.1
+  Clang 3.9.0
 
 Primary tested compilers on Power 8 are:
-  IBM XL 13.1.3 (OpenMP,Serial)
-  GCC 4.9.2 (OpenMP,Serial)
-  GCC 5.3.0 (OpenMP,Serial)
+  GCC 5.4.0 (OpenMP,Serial)
+  IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
+
+Primary tested compilers on Intel KNL are:
+   Intel 16.2.181 (with gcc 4.7.2)
+   Intel 17.0.098 (with gcc 4.7.2)
 
 Secondary tested compilers are:
-  CUDA 6.5 (with gcc 4.7.2)
   CUDA 7.0 (with gcc 4.7.2)
-  CUDA 7.5 (with gcc 4.8.4)
+  CUDA 7.5 (with gcc 4.7.2)
+  CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
+  CUDA/Clang 8.0 using Clang/Trunk compiler
 
 Other compilers working:
   X86:
-   Intel 17.0.042 (the FENL example causes internal compiler error)
    PGI 15.4
    Cygwin 2.1.0 64bit with gcc 4.9.3
-  KNL:
-   Intel 16.2.181 (the FENL example causes internal compiler error)
-   Intel 17.0.042 (the FENL example causes internal compiler error)
 
 Known non-working combinations:
   Power8:
-   GCC 6.1.0
    Pthreads backend
 
 
@@ -92,9 +93,10 @@ master branch, without -Werror and only for a select set of backends.
 
 In the 'example/tutorial' directory you will find step by step tutorial
 examples which explain many of the features of Kokkos. They work with
-simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
+simple Makefiles. To build with g++ and OpenMP simply type 'make'
 in the 'example/tutorial' directory. This will build all examples in the
-subfolders.
+subfolders. To change the build options refer to the Programming Guide
+in the compilation section. 
 
 ============================================================================
 ====Running Unit Tests======================================================
diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
index d7c06dc14be99bc63b8f0170843d81067577771e..78cddeeaecb2e1f8b748b70dcb848e2778fdc7e1 100644
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@@ -476,54 +476,54 @@ namespace Kokkos {
   };
 
   template<class Generator>
-  struct rand<Generator, ::Kokkos::complex<float> > {
+  struct rand<Generator, Kokkos::complex<float> > {
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> max () {
-      return ::Kokkos::complex<float> (1.0, 1.0);
+    static Kokkos::complex<float> max () {
+      return Kokkos::complex<float> (1.0, 1.0);
     }
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> draw (Generator& gen) {
+    static Kokkos::complex<float> draw (Generator& gen) {
       const float re = gen.frand ();
       const float im = gen.frand ();
-      return ::Kokkos::complex<float> (re, im);
+      return Kokkos::complex<float> (re, im);
     }
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) {
+    static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& range) {
       const float re = gen.frand (real (range));
       const float im = gen.frand (imag (range));
-      return ::Kokkos::complex<float> (re, im);
+      return Kokkos::complex<float> (re, im);
     }
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) {
+    static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& start, const Kokkos::complex<float>& end) {
       const float re = gen.frand (real (start), real (end));
       const float im = gen.frand (imag (start), imag (end));
-      return ::Kokkos::complex<float> (re, im);
+      return Kokkos::complex<float> (re, im);
     }
   };
 
   template<class Generator>
-  struct rand<Generator, ::Kokkos::complex<double> > {
+  struct rand<Generator, Kokkos::complex<double> > {
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> max () {
-      return ::Kokkos::complex<double> (1.0, 1.0);
+    static Kokkos::complex<double> max () {
+      return Kokkos::complex<double> (1.0, 1.0);
     }
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> draw (Generator& gen) {
+    static Kokkos::complex<double> draw (Generator& gen) {
       const double re = gen.drand ();
       const double im = gen.drand ();
-      return ::Kokkos::complex<double> (re, im);
+      return Kokkos::complex<double> (re, im);
     }
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) {
+    static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& range) {
       const double re = gen.drand (real (range));
       const double im = gen.drand (imag (range));
-      return ::Kokkos::complex<double> (re, im);
+      return Kokkos::complex<double> (re, im);
     }
     KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) {
+    static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& start, const Kokkos::complex<double>& end) {
       const double re = gen.drand (real (start), real (end));
       const double im = gen.drand (imag (start), imag (end));
-      return ::Kokkos::complex<double> (re, im);
+      return Kokkos::complex<double> (re, im);
     }
   };
 
@@ -670,8 +670,8 @@ namespace Kokkos {
       double S = 2.0;
       double U;
       while(S>=1.0) {
-        U = drand();
-        const double V = drand();
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
         S = U*U+V*V;
       }
       return U*sqrt(-2.0*log(S)/S);
@@ -910,8 +910,8 @@ namespace Kokkos {
       double S = 2.0;
       double U;
       while(S>=1.0) {
-        U = drand();
-        const double V = drand();
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
         S = U*U+V*V;
       }
       return U*sqrt(-2.0*log(S)/S);
@@ -1163,8 +1163,8 @@ namespace Kokkos {
       double S = 2.0;
       double U;
       while(S>=1.0) {
-        U = drand();
-        const double V = drand();
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
         S = U*U+V*V;
       }
       return U*sqrt(-2.0*log(S)/S);
diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
index 6123ce978c8a385a87ac57bdca45a9ff8517757f..5b8c65fee1869c25681567036314d25beab9a5f2 100644
--- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
@@ -51,7 +51,7 @@
 
 namespace Kokkos {
 
-  namespace SortImpl {
+  namespace Impl {
 
   template<class ValuesViewType, int Rank=ValuesViewType::Rank>
   struct CopyOp;
@@ -199,7 +199,7 @@ public:
 
     parallel_for(values.dimension_0(),
         bin_sort_sort_functor<ValuesViewType, offset_type,
-                              SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
+                              Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
 
     deep_copy(values,sorted_values);
   }
@@ -262,17 +262,15 @@ public:
   }
 };
 
-namespace SortImpl {
-
 template<class KeyViewType>
-struct DefaultBinOp1D {
+struct BinOp1D {
   const int max_bins_;
   const double mul_;
   typename KeyViewType::const_value_type range_;
   typename KeyViewType::const_value_type min_;
 
   //Construct BinOp with number of bins, minimum value and maxuimum value
-  DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
+  BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
                                typename KeyViewType::const_value_type max )
      :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
 
@@ -298,13 +296,13 @@ struct DefaultBinOp1D {
 };
 
 template<class KeyViewType>
-struct DefaultBinOp3D {
+struct BinOp3D {
   int max_bins_[3];
   double mul_[3];
   typename KeyViewType::non_const_value_type range_[3];
   typename KeyViewType::non_const_value_type min_[3];
 
-  DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
+  BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
                                typename KeyViewType::const_value_type max[] )
   {
     max_bins_[0] = max_bins__[0]+1;
@@ -348,109 +346,11 @@ struct DefaultBinOp3D {
   }
 };
 
-template<typename Scalar>
-struct min_max {
-  Scalar min;
-  Scalar max;
-  bool init;
-
-  KOKKOS_INLINE_FUNCTION
-  min_max() {
-    min = 0;
-    max = 0;
-    init = 0;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  min_max (const min_max& val) {
-    min = val.min;
-    max = val.max;
-    init = val.init;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  min_max operator = (const min_max& val) {
-    min = val.min;
-    max = val.max;
-    init = val.init;
-    return *this;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (const Scalar& val) {
-    if(init) {
-      min = min<val?min:val;
-      max = max>val?max:val;
-    } else {
-      min = val;
-      max = val;
-      init = 1;
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (const min_max& val) {
-    if(init && val.init) {
-      min = min<val.min?min:val.min;
-      max = max>val.max?max:val.max;
-    } else {
-      if(val.init) {
-        min = val.min;
-        max = val.max;
-        init = 1;
-      }
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (volatile const Scalar& val) volatile {
-    if(init) {
-      min = min<val?min:val;
-      max = max>val?max:val;
-    } else {
-      min = val;
-      max = val;
-      init = 1;
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (volatile const min_max& val) volatile {
-    if(init && val.init) {
-      min = min<val.min?min:val.min;
-      max = max>val.max?max:val.max;
-    } else {
-      if(val.init) {
-        min = val.min;
-        max = val.max;
-        init = 1;
-      }
-    }
-  }
-};
-
-
-template<class ViewType>
-struct min_max_functor {
-  typedef typename ViewType::execution_space execution_space;
-  ViewType view;
-  typedef min_max<typename ViewType::non_const_value_type> value_type;
-  min_max_functor (const ViewType view_):view(view_) {
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(const size_t& i, value_type& val) const {
-    val += view(i);
-  }
-};
+namespace Impl {
 
 template<class ViewType>
 bool try_std_sort(ViewType view) {
   bool possible = true;
-#if ! KOKKOS_USING_EXP_VIEW
-  size_t stride[8];
-  view.stride(stride);
-#else
   size_t stride[8] = { view.stride_0()
                      , view.stride_1()
                      , view.stride_2()
@@ -460,8 +360,7 @@ bool try_std_sort(ViewType view) {
                      , view.stride_6()
                      , view.stride_7()
                      };
-#endif
-  possible  = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
+  possible  = possible && std::is_same<typename ViewType::memory_space, HostSpace>::value;
   possible  = possible && (ViewType::Rank == 1);
   possible  = possible && (stride[0] == 1);
   if(possible)  {
@@ -470,27 +369,39 @@ bool try_std_sort(ViewType view) {
   return possible;
 }
 
+template<class ViewType>
+struct min_max_functor {
+  typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
+
+  ViewType view;
+  min_max_functor(const ViewType& view_):view(view_) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const size_t& i, minmax_scalar& minmax) const {
+    if(view(i) < minmax.min_val) minmax.min_val = view(i);
+    if(view(i) > minmax.max_val) minmax.max_val = view(i);
+  }
+};
+
 }
 
 template<class ViewType>
 void sort(ViewType view, bool always_use_kokkos_sort = false) {
   if(!always_use_kokkos_sort) {
-    if(SortImpl::try_std_sort(view)) return;
+    if(Impl::try_std_sort(view)) return;
   }
-
-  typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
-  SortImpl::min_max<typename ViewType::non_const_value_type> val;
-  parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
-  BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
+  typedef BinOp1D<ViewType> CompType;
+
+  Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
+  Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
+  parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()),
+                  Impl::min_max_functor<ViewType>(view),reducer);
+  if(result.min_val == result.max_val) return;
+  BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true);
   bin_sort.create_permute_vector();
   bin_sort.sort(view);
 }
 
-/*template<class ViewType, class Comparator>
-void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
-
-}*/
-
 }
 
 #endif
diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
index 654104b44e7b395c6937f4c1dc35b4933018268e..fde6b967e06931ad5fd04f38ac2eba20ca654a9e 100644
--- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
+++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
@@ -1,6 +1,6 @@
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
 
 SET(SOURCES
diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile
index 5d79364c52abc7a8a61769d187fc06e5612e203b..3027c6a94b9826ba9da11648539dc5c83bebaa77 100644
--- a/lib/kokkos/algorithms/unit_tests/Makefile
+++ b/lib/kokkos/algorithms/unit_tests/Makefile
@@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
 default: build_all
 	echo "End Build"
 
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= -lpthread
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
 
 TEST_TARGETS = 
diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp
index ccbcbdd0011bbc577ac8c39b2f593ed35f2546ac..03e4fb691ef1a4ae6a7bed6471ccba4e3fd53762 100644
--- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp
@@ -131,6 +131,10 @@ void test_1D_sort(unsigned int n,bool force_kokkos) {
   typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
   KeyViewType keys("Keys",n);
 
+  // Test sorting array with all numbers equal
+  Kokkos::deep_copy(keys,KeyType(1));
+  Kokkos::sort(keys,force_kokkos);
+
   Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
   Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
 
@@ -174,7 +178,7 @@ void test_3D_sort(unsigned int n) {
   typename KeyViewType::value_type min[3] = {0,0,0};
   typename KeyViewType::value_type max[3] = {100,100,100};
 
-  typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
+  typedef Kokkos::BinOp3D< KeyViewType > BinOp;
   BinOp bin_op(bin_max,min,max);
   Kokkos::BinSort< KeyViewType , BinOp >
     Sorter(keys,bin_op,false);
diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..6a1917a523170bb392c6e81855e60489085bf113
--- /dev/null
+++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile
@@ -0,0 +1,43 @@
+KOKKOS_PATH = ${HOME}/kokkos
+SRC = $(wildcard *.cpp)
+KOKKOS_DEVICES=Cuda
+KOKKOS_CUDA_OPTIONS=enable_lambda
+
+default: build
+	echo "Start Build"
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
+EXE = bytes_and_flops.cuda
+KOKKOS_DEVICES = "Cuda,OpenMP"
+KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+EXE = bytes_and_flops.host
+KOKKOS_DEVICES = "OpenMP"
+KOKKOS_ARCH = "SNB"
+endif
+
+CXXFLAGS = -O3 -g
+
+DEPFLAGS = -M
+LINK = ${CXX}
+LINKFLAGS =  
+
+OBJ = $(SRC:.cpp=.o)
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean 
+	rm -f *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e3fe42a652dfaa963578052664a8df71e03afce1
--- /dev/null
+++ b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp
@@ -0,0 +1,99 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<Kokkos_Core.hpp>
+#include<impl/Kokkos_Timer.hpp>
+
+template<class Scalar, int Unroll,int Stride>
+struct Run {
+static void run(int N, int K, int R, int F, int T, int S);
+};
+
+template<class Scalar, int Stride>
+struct RunStride {
+static void run_1(int N, int K, int R, int F, int T, int S);
+static void run_2(int N, int K, int R, int F, int T, int S);
+static void run_3(int N, int K, int R, int F, int T, int S);
+static void run_4(int N, int K, int R, int F, int T, int S);
+static void run_5(int N, int K, int R, int F, int T, int S);
+static void run_6(int N, int K, int R, int F, int T, int S);
+static void run_7(int N, int K, int R, int F, int T, int S);
+static void run_8(int N, int K, int R, int F, int T, int S);
+static void run(int N, int K, int R, int U, int F, int T, int S);
+};
+
+#define STRIDE 1
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 2
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 4
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 8
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 16
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 32
+#include<bench_stride.hpp>
+#undef STRIDE
+
+template<class Scalar>
+void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) {
+ if(D == 1)
+   RunStride<Scalar,1>::run(N,K,R,U,F,T,S);
+ if(D == 2)
+   RunStride<Scalar,2>::run(N,K,R,U,F,T,S);
+ if(D == 4)
+   RunStride<Scalar,4>::run(N,K,R,U,F,T,S);
+ if(D == 8)
+   RunStride<Scalar,8>::run(N,K,R,U,F,T,S);
+ if(D == 16)
+   RunStride<Scalar,16>::run(N,K,R,U,F,T,S);
+ if(D == 32)
+   RunStride<Scalar,32>::run(N,K,R,U,F,T,S);
+}
+
diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b60ec849944b9415d4ad6e1cd3a627cafdc55854
--- /dev/null
+++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp
@@ -0,0 +1,124 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+
+#define UNROLL 1
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 2
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 3
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 4
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 5
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 6
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 7
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 8
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+
+template<class Scalar>
+struct RunStride<Scalar,STRIDE> {
+static void run_1(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,1,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_2(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,2,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_3(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,3,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_4(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,4,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_5(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,5,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_6(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,6,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_7(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,7,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_8(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,8,STRIDE>::run(N,K,R,F,T,S);
+}
+
+static void run(int N, int K, int R, int U, int F, int T, int S) {
+  if(U==1) {
+    run_1(N,K,R,F,T,S);
+  }
+  if(U==2) {
+    run_2(N,K,R,F,T,S);
+  }
+  if(U==3) {
+    run_3(N,K,R,F,T,S);
+  }
+  if(U==4) {
+    run_4(N,K,R,F,T,S);
+  }
+  if(U==5) {
+    run_5(N,K,R,F,T,S);
+  }
+  if(U==6) {
+    run_6(N,K,R,F,T,S);
+  }
+  if(U==7) {
+    run_7(N,K,R,F,T,S);
+  }
+  if(U==8) {
+    run_8(N,K,R,F,T,S);
+  } 
+}
+};
+
diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0992c5b54b6277f99d728710a37182695d3a6f92
--- /dev/null
+++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp
@@ -0,0 +1,148 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+template<class Scalar>
+struct Run<Scalar,UNROLL,STRIDE> {
+static void run(int N, int K, int R, int F, int T, int S) {
+  Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> A("A",N,K);
+  Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> B("B",N,K);
+  Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> C("C",N,K);
+
+  Kokkos::deep_copy(A,Scalar(1.5));
+  Kokkos::deep_copy(B,Scalar(2.5));
+  Kokkos::deep_copy(C,Scalar(3.5));
+
+  Kokkos::Timer timer;
+  Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)),
+    KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) {
+    const int n = team.league_rank();
+    for(int r=0; r<R; r++) {
+      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,K), [&] (const int& i) {
+        Scalar a1 = A(n,i,0); 
+        const Scalar b = B(n,i,0);
+#if(UNROLL>1)
+        Scalar a2 = a1*1.3;
+#endif
+#if(UNROLL>2)
+        Scalar a3 = a2*1.1;
+#endif
+#if(UNROLL>3)
+        Scalar a4 = a3*1.1;
+#endif
+#if(UNROLL>4)
+        Scalar a5 = a4*1.3;
+#endif
+#if(UNROLL>5)
+        Scalar a6 = a5*1.1;
+#endif
+#if(UNROLL>6)
+        Scalar a7 = a6*1.1;
+#endif
+#if(UNROLL>7)
+        Scalar a8 = a7*1.1;
+#endif
+
+
+        for(int f = 0; f<F; f++) {
+          a1 += b*a1;
+#if(UNROLL>1)
+          a2 += b*a2;
+#endif
+#if(UNROLL>2)
+          a3 += b*a3;
+#endif
+#if(UNROLL>3)
+          a4 += b*a4;
+#endif
+#if(UNROLL>4)
+          a5 += b*a5;
+#endif
+#if(UNROLL>5)
+          a6 += b*a6;
+#endif
+#if(UNROLL>6)
+          a7 += b*a7;
+#endif
+#if(UNROLL>7)
+          a8 += b*a8;
+#endif
+
+
+        }
+#if(UNROLL==1)
+        C(n,i,0) = a1; 
+#endif
+#if(UNROLL==2)
+        C(n,i,0) = a1+a2; 
+#endif
+#if(UNROLL==3)
+        C(n,i,0) = a1+a2+a3; 
+#endif
+#if(UNROLL==4)
+        C(n,i,0) = a1+a2+a3+a4; 
+#endif
+#if(UNROLL==5)
+        C(n,i,0) = a1+a2+a3+a4+a5;
+#endif
+#if(UNROLL==6)
+        C(n,i,0) = a1+a2+a3+a4+a5+a6;
+#endif
+#if(UNROLL==7)
+        C(n,i,0) = a1+a2+a3+a4+a5+a6+a7;
+#endif
+#if(UNROLL==8)
+        C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8;
+#endif
+
+      });
+    }
+  });
+  Kokkos::fence(); 
+  double seconds = timer.seconds();
+
+  double bytes = 1.0*N*K*R*3*sizeof(Scalar);
+  double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
+  printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds);
+}
+};
+
diff --git a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f545247212ab6057baca8bfb39463daa760747db
--- /dev/null
+++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
@@ -0,0 +1,96 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<Kokkos_Core.hpp>
+#include<impl/Kokkos_Timer.hpp>
+#include<bench.hpp>
+
+int main(int argc, char* argv[]) {
+  Kokkos::initialize();
+  
+
+  if(argc<10) { 
+    printf("Arguments: N K R D U F T S\n");
+    printf("  P:   Precision (1==float, 2==double)\n");
+    printf("  N,K: dimensions of the 2D array to allocate\n");
+    printf("  R:   how often to loop through the K dimension with each team\n");
+    printf("  D:   distance between loaded elements (stride)\n");
+    printf("  U:   how many independent flops to do per load\n");
+    printf("  F:   how many times to repeat the U unrolled operations before reading next element\n");
+    printf("  T:   team size\n");
+    printf("  S:   shared memory per team (used to control occupancy on GPUs)\n");
+    printf("Example Input GPU:\n");
+    printf("  Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n");
+    printf("  Cache Bound     : 2 100000 1024 64 1 1 1 512 20000\n");
+    printf("  Compute Bound   : 2 100000 1024 1 1 8 64 256 6000\n");
+    printf("  Load Slots Used : 2 20000 256 32 16 1 1 256 6000\n");
+    printf("  Inefficient Load: 2 20000 256 32 2 1 1 256 20000\n");
+    Kokkos::finalize();
+    return 0;
+  }
+  
+
+  int P = atoi(argv[1]);
+  int N = atoi(argv[2]);
+  int K = atoi(argv[3]);
+  int R = atoi(argv[4]);
+  int D = atoi(argv[5]);
+  int U = atoi(argv[6]);
+  int F = atoi(argv[7]);
+  int T = atoi(argv[8]);
+  int S = atoi(argv[9]);
+
+  if(U>8) {printf("U must be 1-8\n"); return 0;} 
+  if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;}
+  if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;}
+
+  if(P==1) {
+    run_stride_unroll<float>(N,K,R,D,U,F,T,S);
+  }
+  if(P==2) {
+    run_stride_unroll<double>(N,K,R,D,U,F,T,S);
+  }
+
+  Kokkos::finalize();
+}
+
diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..fd1feab6fa8c22d13c726dad7312e769bbdffc37
--- /dev/null
+++ b/lib/kokkos/benchmarks/gather/Makefile
@@ -0,0 +1,44 @@
+KOKKOS_PATH = ${HOME}/kokkos
+SRC = $(wildcard *.cpp)
+KOKKOS_DEVICES=Cuda
+KOKKOS_CUDA_OPTIONS=enable_lambda
+
+default: build
+	echo "Start Build"
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
+EXE = gather.cuda
+KOKKOS_DEVICES = "Cuda,OpenMP"
+KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+EXE = gather.host
+KOKKOS_DEVICES = "OpenMP"
+KOKKOS_ARCH = "SNB"
+endif
+
+CXXFLAGS = -O3 -g
+
+DEPFLAGS = -M
+LINK = ${CXX}
+LINKFLAGS =  
+
+OBJ = $(SRC:.cpp=.o)
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+$(warning ${KOKKOS_CPPFLAGS})
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean 
+	rm -f *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp 
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
diff --git a/lib/kokkos/benchmarks/gather/gather.hpp b/lib/kokkos/benchmarks/gather/gather.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..406bd28983bd696e12cb82aeea388f94b6e80047
--- /dev/null
+++ b/lib/kokkos/benchmarks/gather/gather.hpp
@@ -0,0 +1,92 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+template<class Scalar, int UNROLL>
+struct RunGather {
+  static void run(int N, int K, int D, int R, int F);
+};
+
+#define UNROLL 1
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 2
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 3
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 4
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 5
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 6
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 7
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 8
+#include<gather_unroll.hpp>
+#undef UNROLL
+
+template<class Scalar>
+void run_gather_test(int N, int K, int D, int R, int U, int F) {
+ if(U == 1)
+   RunGather<Scalar,1>::run(N,K,D,R,F);
+ if(U == 2)
+   RunGather<Scalar,2>::run(N,K,D,R,F);
+ if(U == 3)
+   RunGather<Scalar,3>::run(N,K,D,R,F);
+ if(U == 4)
+   RunGather<Scalar,4>::run(N,K,D,R,F);
+ if(U == 5)
+   RunGather<Scalar,5>::run(N,K,D,R,F);
+ if(U == 6)
+   RunGather<Scalar,6>::run(N,K,D,R,F);
+ if(U == 7)
+   RunGather<Scalar,7>::run(N,K,D,R,F);
+ if(U == 8)
+   RunGather<Scalar,8>::run(N,K,D,R,F);
+}
diff --git a/lib/kokkos/benchmarks/gather/gather_unroll.hpp b/lib/kokkos/benchmarks/gather/gather_unroll.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1d01b26ca77f2b7e58f5a1cbd5bbe251c3d558d3
--- /dev/null
+++ b/lib/kokkos/benchmarks/gather/gather_unroll.hpp
@@ -0,0 +1,169 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<Kokkos_Core.hpp>
+#include<Kokkos_Random.hpp>
+
+template<class Scalar>
+struct RunGather<Scalar,UNROLL> {
+static void run(int N, int K, int D, int R, int F) {
+  Kokkos::View<int**> connectivity("Connectivity",N,K);
+  Kokkos::View<Scalar*> A_in("Input",N);
+  Kokkos::View<Scalar*> B_in("Input",N);
+  Kokkos::View<Scalar*> C("Output",N);
+
+  Kokkos::Random_XorShift64_Pool<> rand_pool(12313);
+
+  Kokkos::deep_copy(A_in,1.5);
+  Kokkos::deep_copy(B_in,2.0);
+
+  Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(A_in);
+  Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(B_in);
+
+  Kokkos::parallel_for("InitKernel",N,
+      KOKKOS_LAMBDA (const int& i) {
+    auto rand_gen = rand_pool.get_state();
+    for( int jj=0; jj<K; jj++) {
+      connectivity(i,jj) = (rand_gen.rand(D) + i - D/2 + N)%N;
+    }
+    rand_pool.free_state(rand_gen);
+  });
+  Kokkos::fence();
+
+
+  Kokkos::Timer timer;
+  for(int r = 0; r<R; r++) {
+  Kokkos::parallel_for("BenchmarkKernel",N,
+      KOKKOS_LAMBDA (const int& i) {
+      Scalar c = Scalar(0.0);
+      for( int jj=0; jj<K; jj++) {
+        const int j = connectivity(i,jj);
+        Scalar a1 = A(j);
+        const Scalar b = B(j);
+#if(UNROLL>1)
+        Scalar a2 = a1*Scalar(1.3);
+#endif
+#if(UNROLL>2)
+        Scalar a3 = a2*Scalar(1.1);
+#endif
+#if(UNROLL>3)
+        Scalar a4 = a3*Scalar(1.1);
+#endif
+#if(UNROLL>4)
+        Scalar a5 = a4*Scalar(1.3);
+#endif
+#if(UNROLL>5)
+        Scalar a6 = a5*Scalar(1.1);
+#endif
+#if(UNROLL>6)
+        Scalar a7 = a6*Scalar(1.1);
+#endif
+#if(UNROLL>7)
+        Scalar a8 = a7*Scalar(1.1);
+#endif
+
+
+        for(int f = 0; f<F; f++) {
+          a1 += b*a1;
+#if(UNROLL>1)
+          a2 += b*a2;
+#endif
+#if(UNROLL>2)
+          a3 += b*a3;
+#endif
+#if(UNROLL>3)
+          a4 += b*a4;
+#endif
+#if(UNROLL>4)
+          a5 += b*a5;
+#endif
+#if(UNROLL>5)
+          a6 += b*a6;
+#endif
+#if(UNROLL>6)
+          a7 += b*a7;
+#endif
+#if(UNROLL>7)
+          a8 += b*a8;
+#endif
+
+
+        }
+#if(UNROLL==1)
+        c += a1;
+#endif
+#if(UNROLL==2)
+        c += a1+a2;
+#endif
+#if(UNROLL==3)
+        c += a1+a2+a3;
+#endif
+#if(UNROLL==4)
+        c += a1+a2+a3+a4;
+#endif
+#if(UNROLL==5)
+        c += a1+a2+a3+a4+a5;
+#endif
+#if(UNROLL==6)
+        c += a1+a2+a3+a4+a5+a6;
+#endif
+#if(UNROLL==7)
+        c += a1+a2+a3+a4+a5+a6+a7;
+#endif
+#if(UNROLL==8)
+        c += a1+a2+a3+a4+a5+a6+a7+a8;
+#endif
+
+      }
+      C(i) = c ;
+  });
+  Kokkos::fence();
+  }
+  double seconds = timer.seconds();
+
+  double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar);
+  double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
+  double gather_ops = 1.0*N*K*R*2;
+  printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds);
+}
+};
diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp b/lib/kokkos/benchmarks/gather/main.cpp
similarity index 54%
rename from lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
rename to lib/kokkos/benchmarks/gather/main.cpp
index 4eb80d03f1fa0c26a2ba9524b16719dcf2a72e99..161c6f20919639845adecd96d74d978c65ea952f 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
+++ b/lib/kokkos/benchmarks/gather/main.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,73 +36,58 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
 
-#include <Kokkos_HostSpace.hpp>
-
-#include <impl/Kokkos_HBWAllocators.hpp>
-#include <impl/Kokkos_Error.hpp>
-
-
-#include <stdint.h>    // uintptr_t
-#include <cstdlib>     // for malloc, realloc, and free
-#include <cstring>     // for memcpy
+#include<Kokkos_Core.hpp>
+#include<impl/Kokkos_Timer.hpp>
+#include<gather.hpp>
 
-#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
-#include <sys/mman.h>  // for mmap, munmap, MAP_ANON, etc
-#include <unistd.h>    // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
-#endif
+int main(int argc, char* argv[]) {
+  Kokkos::initialize(argc,argv);
 
-#include <sstream>
-#include <iostream>
 
-#ifdef KOKKOS_HAVE_HBWSPACE
-#include <memkind.h>
+  if(argc<8) {
+    printf("Arguments: S N K D\n");
+    printf("  S:   Scalar Type Size (1==float, 2==double, 4=complex<double>)\n");
+    printf("  N:   Number of entities\n");
+    printf("  K:   Number of things to gather per entity\n");
+    printf("  D:   Max distance of gathered things of an entity\n");
+    printf("  R:   how often to loop through the K dimension with each team\n");
+    printf("  U:   how many independent flops to do per load\n");
+    printf("  F:   how many times to repeat the U unrolled operations before reading next element\n");
+    printf("Example Input GPU:\n");
+    printf("  Bandwidth Bound : 2 10000000 1 1 10 1 1\n");
+    printf("  Cache Bound     : 2 10000000 64 1 10 1 1\n");
+    printf("  Cache Gather    : 2 10000000 64 256 10 1 1\n");
+    printf("  Global Gather   : 2 100000000 16 100000000 1 1 1\n");
+    printf("  Typical MD      : 2 100000 32 512 1000 8 2\n");
+    Kokkos::finalize();
+    return 0;
+  }
 
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
-/*--------------------------------------------------------------------------*/
 
-void* HBWMallocAllocator::allocate( size_t size )
-{
-  std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
-  void * ptr = NULL;
-  if (size) {
-    ptr = memkind_malloc(MEMKIND_TYPE,size);
+  int S = atoi(argv[1]);
+  int N = atoi(argv[2]);
+  int K = atoi(argv[3]);
+  int D = atoi(argv[4]);
+  int R = atoi(argv[5]);
+  int U = atoi(argv[6]);
+  int F = atoi(argv[7]);
 
-    if (!ptr)
-    {
-      std::ostringstream msg ;
-      msg << name() << ": allocate(" << size << ") FAILED";
-      Kokkos::Impl::throw_runtime_exception( msg.str() );
-    }
+  if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;}
+  if( N<D ) {printf("N must be larger or equal to D\n"); return 0; }
+  if(S==1) {
+    run_gather_test<float>(N,K,D,R,U,F);
   }
-  return ptr;
-}
-
-void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
-{
-  if (ptr) {
-    memkind_free(MEMKIND_TYPE,ptr);
+  if(S==2) {
+    run_gather_test<double>(N,K,D,R,U,F);
   }
-}
-
-void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
-{
-  void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
-
-  if (new_size > 0u && ptr == NULL) {
-    Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
+  if(S==4) {
+    run_gather_test<Kokkos::complex<double> >(N,K,D,R,U,F);
   }
-  return ptr;
+  Kokkos::finalize();
 }
 
-} // namespace Impl
-} // namespace Experimental
-} // namespace Kokkos
-#endif
diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper
new file mode 100755
index 0000000000000000000000000000000000000000..cb206cf88b2c4e3a4f289bc919cc272e22749f36
--- /dev/null
+++ b/lib/kokkos/bin/nvcc_wrapper
@@ -0,0 +1,284 @@
+#!/bin/bash
+#
+# This shell script (nvcc_wrapper) wraps both the host compiler and
+# NVCC, if you are building legacy C or C++ code with CUDA enabled.
+# The script remedies some differences between the interface of NVCC
+# and that of the host compiler, in particular for linking.
+# It also means that a legacy code doesn't need separate .cu files;
+# it can just use .cpp files.
+#
+# Default settings: change those according to your machine.  For
+# example, you may have have two different wrappers with either icpc
+# or g++ as their back-end compiler.  The defaults can be overwritten
+# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
+
+default_arch="sm_35"
+#default_arch="sm_50"
+
+#
+# The default C++ compiler.
+#
+host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
+#host_compiler="icpc"
+#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
+#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
+
+#
+# Internal variables
+#
+
+# C++ files
+cpp_files=""
+
+# Host compiler arguments
+xcompiler_args=""
+
+# Cuda (NVCC) only arguments
+cuda_args=""
+
+# Arguments for both NVCC and Host compiler
+shared_args=""
+
+# Linker arguments
+xlinker_args=""
+
+# Object files passable to NVCC
+object_files=""
+
+# Link objects for the host linker only
+object_files_xlinker=""
+
+# Shared libraries with version numbers are not handled correctly by NVCC
+shared_versioned_libraries_host=""
+shared_versioned_libraries=""
+
+# Does the User set the architecture 
+arch_set=0
+
+# Does the user overwrite the host compiler
+ccbin_set=0
+
+#Error code of compilation
+error_code=0
+
+# Do a dry run without actually compiling
+dry_run=0
+
+# Skip NVCC compilation and use host compiler directly
+host_only=0
+
+# Enable workaround for CUDA 6.5 for pragma ident 
+replace_pragma_ident=0
+
+# Mark first host compiler argument
+first_xcompiler_arg=1
+
+temp_dir=${TMPDIR:-/tmp}
+
+# Check if we have an optimization argument already
+optimization_applied=0
+
+#echo "Arguments: $# $@"
+
+while [ $# -gt 0 ]
+do
+  case $1 in
+  #show the executed command
+  --show|--nvcc-wrapper-show)
+    dry_run=1
+    ;;
+  #run host compilation only
+  --host-only)
+    host_only=1
+    ;;
+  #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
+  --replace-pragma-ident)
+    replace_pragma_ident=1
+    ;;
+  #handle source files to be compiled as cuda files
+  *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
+    cpp_files="$cpp_files $1"
+    ;;
+   # Ensure we only have one optimization flag because NVCC doesn't allow muliple
+  -O*)
+    if [ $optimization_applied -eq 1 ]; then
+       echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
+    else
+       shared_args="$shared_args $1"
+       optimization_applied=1
+    fi
+    ;;
+  #Handle shared args (valid for both nvcc and the host compiler)
+  -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
+    shared_args="$shared_args $1"
+    ;;
+  #Handle shared args that have an argument
+  -o|-MT)
+    shared_args="$shared_args $1 $2"
+    shift
+    ;;
+  #Handle known nvcc args
+  -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
+    cuda_args="$cuda_args $1"
+    ;;
+  #Handle more known nvcc args
+  --expt-extended-lambda|--expt-relaxed-constexpr)
+    cuda_args="$cuda_args $1"
+    ;;
+  #Handle known nvcc args that have an argument
+  -rdc|-maxrregcount|--default-stream)
+    cuda_args="$cuda_args $1 $2"
+    shift
+    ;;
+  #Handle c++11 setting
+  --std=c++11|-std=c++11)
+    shared_args="$shared_args $1"
+    ;;
+  #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
+  -std=c++98|--std=c++98)
+    ;;
+  #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
+  -pedantic|-Wpedantic|-ansi)
+    ;;
+  #strip -Xcompiler because we add it
+  -Xcompiler)
+    if [ $first_xcompiler_arg -eq 1 ]; then
+      xcompiler_args="$2"
+      first_xcompiler_arg=0
+    else
+      xcompiler_args="$xcompiler_args,$2"
+    fi
+    shift
+    ;;
+  #strip of "-x cu" because we add that
+  -x)
+    if [[ $2 != "cu" ]]; then
+      if [ $first_xcompiler_arg -eq 1 ]; then
+        xcompiler_args="-x,$2"
+        first_xcompiler_arg=0
+      else
+        xcompiler_args="$xcompiler_args,-x,$2"
+      fi
+    fi
+    shift
+    ;;
+  #Handle -ccbin (if its not set we can set it to a default value)
+  -ccbin)
+    cuda_args="$cuda_args $1 $2"
+    ccbin_set=1
+    host_compiler=$2
+    shift
+    ;;
+  #Handle -arch argument (if its not set use a default
+  -arch*)
+    cuda_args="$cuda_args $1"
+    arch_set=1
+    ;;
+  #Handle -Xcudafe argument
+  -Xcudafe)
+    cuda_args="$cuda_args -Xcudafe $2"
+    shift
+    ;;
+  #Handle args that should be sent to the linker
+  -Wl*)
+    xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
+    host_linker_args="$host_linker_args ${1:4:${#1}}"
+    ;;
+  #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
+  *.a|*.so|*.o|*.obj)
+    object_files="$object_files $1"
+    object_files_xlinker="$object_files_xlinker -Xlinker $1"
+    ;;
+  #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
+  *.dylib)
+    object_files="$object_files -Xlinker $1"
+    object_files_xlinker="$object_files_xlinker -Xlinker $1"
+    ;;
+  #Handle shared libraries with *.so.* names which nvcc can't do.
+  *.so.*)
+    shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
+    shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
+  ;;
+  #All other args are sent to the host compiler
+  *)
+    if [ $first_xcompiler_arg -eq 1 ]; then
+      xcompiler_args=$1
+      first_xcompiler_arg=0
+    else 
+      xcompiler_args="$xcompiler_args,$1"
+    fi
+    ;;
+  esac
+
+  shift
+done
+
+#Add default host compiler if necessary
+if [ $ccbin_set -ne 1 ]; then
+  cuda_args="$cuda_args -ccbin $host_compiler"
+fi
+
+#Add architecture command
+if [ $arch_set -ne 1 ]; then
+  cuda_args="$cuda_args -arch=$default_arch"
+fi
+
+#Compose compilation command
+nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
+if [ $first_xcompiler_arg -eq 0 ]; then
+  nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
+fi
+
+#Compose host only command
+host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
+
+#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
+if [ $replace_pragma_ident -eq 1 ]; then
+  cpp_files2=""
+  for file in $cpp_files
+  do
+    var=`grep pragma ${file} | grep ident | grep "#"`
+    if [ "${#var}" -gt 0 ]
+    then
+      sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
+      cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
+    else
+      cpp_files2="$cpp_files2 $file"
+    fi
+  done
+  cpp_files=$cpp_files2
+  #echo $cpp_files
+fi
+
+if [ "$cpp_files" ]; then
+  nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
+else
+  nvcc_command="$nvcc_command $object_files"
+fi
+
+if [ "$cpp_files" ]; then
+  host_command="$host_command $object_files $cpp_files"
+else
+  host_command="$host_command $object_files"
+fi
+
+#Print command for dryrun
+if [ $dry_run -eq 1 ]; then
+  if [ $host_only -eq 1 ]; then
+    echo $host_command
+  else
+    echo $nvcc_command
+  fi
+  exit 0
+fi
+
+#Run compilation command
+if [ $host_only -eq 1 ]; then
+  $host_command
+else
+  $nvcc_command
+fi
+error_code=$?
+
+#Report error code
+exit $error_code
diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake
index 205f5e2a98898b8247b0f199afcc2e3ac4bc97b4..6f26d857c09acf7bb24c2c5449a54f5d507deae8 100644
--- a/lib/kokkos/cmake/deps/CUSPARSE.cmake
+++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake
@@ -53,12 +53,12 @@
 # ************************************************************************
 # @HEADER
 
-include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
+#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
 
-IF (TPL_ENABLE_CUDA)
-  GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
-  GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
-  GLOBAL_SET(TPL_CUSPARSE_LIBRARIES    ${CUDA_cusparse_LIBRARY})
-  TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
-ENDIF()
+#IF (TPL_ENABLE_CUDA)
+#  GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
+#  GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
+#  GLOBAL_SET(TPL_CUSPARSE_LIBRARIES    ${CUDA_cusparse_LIBRARY})
+#  TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
+#ENDIF()
 
diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake
index 34cd216f810c9a829dbcdc13ed5e9c3be81752ac..879d80172068db6a4afe62f9687dad9219859e2d 100644
--- a/lib/kokkos/cmake/tribits.cmake
+++ b/lib/kokkos/cmake/tribits.cmake
@@ -1,6 +1,16 @@
 INCLUDE(CMakeParseArguments)
 INCLUDE(CTest)
 
+cmake_policy(SET CMP0054 NEW)
+
+IF(NOT DEFINED ${PROJECT_NAME})
+  project(Kokkos) 
+ENDIF()
+
+IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
+  SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
+ENDIF()
+
 FUNCTION(ASSERT_DEFINED VARS)
   FOREACH(VAR ${VARS})
     IF(NOT DEFINED ${VAR})
@@ -75,6 +85,13 @@ MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
 
 ENDMACRO()
 
+
+function(INCLUDE_DIRECTORIES)
+  cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN})
+  _INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS})
+endfunction()
+
+
 MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT)
   SET(PROP_VALUES)
   FOREACH(TARGET_X ${ARGN})
@@ -271,6 +288,11 @@ ENDFUNCTION()
 
 ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
 
+FUNCTION(TRIBITS_ADD_TEST)
+ENDFUNCTION()
+FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE)
+ENDFUNCTION()
+
 FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME)
 
   SET(options STANDARD_PASS_OUTPUT WILL_FAIL)
diff --git a/lib/kokkos/config/configure_compton_cpu.sh b/lib/kokkos/config/configure_compton_cpu.sh
old mode 100755
new mode 100644
diff --git a/lib/kokkos/config/configure_compton_mic.sh b/lib/kokkos/config/configure_compton_mic.sh
old mode 100755
new mode 100644
diff --git a/lib/kokkos/config/configure_kokkos.sh b/lib/kokkos/config/configure_kokkos.sh
old mode 100755
new mode 100644
diff --git a/lib/kokkos/config/configure_kokkos_nvidia.sh b/lib/kokkos/config/configure_kokkos_nvidia.sh
old mode 100755
new mode 100644
diff --git a/lib/kokkos/config/configure_shannon.sh b/lib/kokkos/config/configure_shannon.sh
old mode 100755
new mode 100644
diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt
index 9f56f2fd48d30da63f28662431711c8b20d1f4a5..961e4186ec6e0fd24c3b71bddcbcbaa2873a41ca 100644
--- a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt
+++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt
@@ -91,9 +91,20 @@ Step 3:
 
 // -------------------------------------------------------------------------------- //
 
-Step 4:
-  4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github
+Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github
+  4.1. Generate Changelog (You need a github API token)
+    
+       Close all Open issues with "InDevelop" tag on github
+   
+       (Not from kokkos directory)
+       gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
+       
+       (Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md)
+       (Make desired changes to CHANGELOG.md to enhance clarity)
+       (Commit and push the CHANGELOG to develop)
 
+  4.2 Merge develop into Master
+              
        - DO NOT fast-forward the merge!!!!
 
        (From kokkos directory):
@@ -103,7 +114,7 @@ Step 4:
        git reset --hard origin/master
        git merge --no-ff origin/develop
 
-  4.2. Update the tag in kokkos/config/master_history.txt
+  4.3. Update the tag in kokkos/config/master_history.txt
        Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
        Tag format: #.#.##
 
diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt
index f2eb674578f2c14442376210dfd1080050fe3917..78c512ccea4d5d1acf2c19c7157104c384be7a61 100644
--- a/lib/kokkos/config/master_history.txt
+++ b/lib/kokkos/config/master_history.txt
@@ -1,3 +1,6 @@
 tag:  2.01.00    date: 07:21:2016    master: xxxxxxxx    develop: fa6dfcc4
 tag:  2.01.06    date: 09:02:2016    master: 9afaa87f    develop: 555f1a3a
-
+tag:  2.01.10    date: 09:27:2016    master: e4119325    develop: e6cda11e
+tag:  2.02.00    date: 10:30:2016    master: 6c90a581    develop: ca3dd56e
+tag:  2.02.01    date: 11:01:2016    master: 9c698c86    develop: b0072304
+tag:  2.02.07    date: 12:16:2016    master: 4b4cc4ba    develop: 382c0966
diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper
index 6093cb61bdaf5a3f030406b8e149580b818920d0..cb206cf88b2c4e3a4f289bc919cc272e22749f36 100755
--- a/lib/kokkos/config/nvcc_wrapper
+++ b/lib/kokkos/config/nvcc_wrapper
@@ -121,6 +121,10 @@ do
   -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
     cuda_args="$cuda_args $1"
     ;;
+  #Handle more known nvcc args
+  --expt-extended-lambda|--expt-relaxed-constexpr)
+    cuda_args="$cuda_args $1"
+    ;;
   #Handle known nvcc args that have an argument
   -rdc|-maxrregcount|--default-stream)
     cuda_args="$cuda_args $1 $2"
diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia
index aac036a8f37abfedabac7a4849289ecb3cbdfcd0..21b8bbff657700b9a6439cc975ea8a68b2c1e8e2 100755
--- a/lib/kokkos/config/test_all_sandia
+++ b/lib/kokkos/config/test_all_sandia
@@ -16,6 +16,8 @@ elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
     MACHINE=bowman
 elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
     MACHINE=shepard
+elif [[ "$HOSTNAME" =~ apollo ]]; then
+    MACHINE=apollo
 elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
     MACHINE=sems
 else
@@ -28,6 +30,7 @@ IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
 INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
 CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
 CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
+CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
 
 GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
 IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
@@ -44,20 +47,102 @@ BUILD_ONLY=False
 declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
 TEST_SCRIPT=False
 SKIP_HWLOC=False
+SPOT_CHECK=False
 
-ARCH_FLAG=""
+PRINT_HELP=False
+OPT_FLAG=""
+KOKKOS_OPTIONS=""
+
+
+#
+# Handle arguments
+#
+
+while [[ $# > 0 ]]
+do
+key="$1"
+case $key in
+--kokkos-path*)
+KOKKOS_PATH="${key#*=}"
+;;
+--build-list*)
+CUSTOM_BUILD_LIST="${key#*=}"
+;;
+--debug*)
+DEBUG=True
+;;
+--build-only*)
+BUILD_ONLY=True
+;;
+--test-script*)
+TEST_SCRIPT=True
+;;
+--skip-hwloc*)
+SKIP_HWLOC=True
+;;
+--num*)
+NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
+;;
+--dry-run*)
+DRYRUN=True
+;;
+--spot-check*)
+SPOT_CHECK=True
+;;
+--arch*)
+ARCH_FLAG="--arch=${key#*=}"
+;;
+--opt-flag*)
+OPT_FLAG="${key#*=}"
+;;
+--with-cuda-options*)
+KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
+;;
+--help*)
+PRINT_HELP=True
+;;
+*)
+# args, just append
+ARGS="$ARGS $1"
+;;
+esac
+shift
+done
+
+SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
+
+# set kokkos path
+if [ -z "$KOKKOS_PATH" ]; then
+    KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
+else
+    # Ensure KOKKOS_PATH is abs path
+    KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
+fi
 
 #
 # Machine specific config
 #
 
 if [ "$MACHINE" = "sems" ]; then
-    source /projects/modulefiles/utils/sems-modules-init.sh
-    source /projects/modulefiles/utils/kokkos-modules-init.sh
+    source /projects/sems/modulefiles/utils/sems-modules-init.sh
+
+    BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
+    CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
+    CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
 
-    BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
-    CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG=""
+    fi 
 
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
+               "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
@@ -66,12 +151,15 @@ if [ "$MACHINE" = "sems" ]; then
                "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
                "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
+  fi
 
 elif [ "$MACHINE" = "white" ]; then
     source /etc/profile.d/modules.sh
@@ -80,19 +168,20 @@ elif [ "$MACHINE" = "white" ]; then
 
     BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
     IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
-    CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2"
+    CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
 
     # Don't do pthread on white
     GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
 
     # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+    COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
-
-    ARCH_FLAG="--arch=Power8"
-    NUM_JOBS_TO_RUN_IN_PARALLEL=8
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=Power8,Kepler37"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
 
 elif [ "$MACHINE" = "bowman" ]; then
     source /etc/profile.d/modules.sh
@@ -105,11 +194,13 @@ elif [ "$MACHINE" = "bowman" ]; then
 
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
     )
 
-    ARCH_FLAG="--arch=KNL"
-    NUM_JOBS_TO_RUN_IN_PARALLEL=8
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=KNL"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
 
 elif [ "$MACHINE" = "shepard" ]; then
     source /etc/profile.d/modules.sh
@@ -122,58 +213,84 @@ elif [ "$MACHINE" = "shepard" ]; then
 
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
     )
 
-    ARCH_FLAG="--arch=HSW"
-    NUM_JOBS_TO_RUN_IN_PARALLEL=8
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=HSW"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
+
+elif [ "$MACHINE" = "apollo" ]; then
+    source /projects/sems/modulefiles/utils/sems-modules-init.sh
+    module use /home/projects/modulefiles/local/x86-64
+    module load kokkos-env
+
+    module load sems-git
+    module load sems-tex
+    module load sems-cmake/3.5.2
+    module load sems-gdb
+
+    SKIP_HWLOC=True
+
+    BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
+    CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
+    CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
+
+    CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44"
+    NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
+
+    BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
+    BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
+    BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
 
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
+               "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
+               "clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
+               "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
+               "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  fi
+
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=SNB,Kepler35"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
 else
     echo "Unhandled machine $MACHINE" >&2
     exit 1
 fi
 
+
+
 export OMP_NUM_THREADS=4
 
 declare -i NUM_RESULTS_TO_KEEP=7
 
 RESULT_ROOT_PREFIX=TestAll
 
-SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
-
-#
-# Handle arguments
-#
-
-while [[ $# > 0 ]]
-do
-key="$1"
-case $key in
---kokkos-path*)
-KOKKOS_PATH="${key#*=}"
-;;
---build-list*)
-CUSTOM_BUILD_LIST="${key#*=}"
-;;
---debug*)
-DEBUG=True
-;;
---build-only*)
-BUILD_ONLY=True
-;;
---test-script*)
-TEST_SCRIPT=True
-;;
---skip-hwloc*)
-SKIP_HWLOC=True
-;;
---num*)
-NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
-;;
---dry-run*)
-DRYRUN=True
-;;
---help)
+if [ "$PRINT_HELP" = "True" ]; then
 echo "test_all_sandia <ARGS> <OPTIONS>:"
 echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
 echo "    Defaults to root repo containing this script"
@@ -183,6 +300,9 @@ echo "--skip-hwloc: Do not do hwloc tests"
 echo "--num=N: Number of jobs to run in parallel "
 echo "--dry-run: Just print what would be executed"
 echo "--build-only: Just do builds, don't run anything"
+echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
+echo "--arch=ARCHITECTURE: overwrite architecture flags"
+echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
 echo "--build-list=BUILD,BUILD,BUILD..."
 echo "    Provide a comma-separated list of builds instead of running all builds"
 echo "    Valid items:"
@@ -220,21 +340,6 @@ echo "  hit ctrl-z"
 echo "  % kill -9 %1"
 echo
 exit 0
-;;
-*)
-# args, just append
-ARGS="$ARGS $1"
-;;
-esac
-shift
-done
-
-# set kokkos path
-if [ -z "$KOKKOS_PATH" ]; then
-    KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
-else
-    # Ensure KOKKOS_PATH is abs path
-    KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
 fi
 
 # set build type
@@ -381,11 +486,15 @@ single_build_and_test() {
         local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
     fi
 
+    if [[ "$OPT_FLAG" = "" ]]; then
+      OPT_FLAG="-O3"
+    fi
+
     if [[ "$build_type" = *debug* ]]; then
         local extra_args="$extra_args --debug"
         local cxxflags="-g $compiler_warning_flags"
     else
-        local cxxflags="-O3 $compiler_warning_flags"
+        local cxxflags="$OPT_FLAG $compiler_warning_flags"
     fi
 
     if [[ "$compiler" == cuda* ]]; then
@@ -393,7 +502,9 @@ single_build_and_test() {
         export TMPDIR=$(pwd)
     fi
 
-    # cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags"
+    if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
+        local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
+    fi
 
     echo "  Starting job $desc"
 
@@ -440,13 +551,14 @@ run_in_background() {
     local compiler=$1
 
     local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
-    if [[ "$BUILD_ONLY" == True ]]; then
-        num_jobs=8
-    else
+    # don't override command line input
+    # if [[ "$BUILD_ONLY" == True ]]; then
+        # num_jobs=8
+    # else
         if [[ "$compiler" == cuda* ]]; then
             num_jobs=1
         fi
-    fi
+    # fi
     wait_for_jobs $num_jobs
 
     single_build_and_test $* &
diff --git a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh
new file mode 100755
index 0000000000000000000000000000000000000000..d2a7a533d5b34c044edc017605c85e93aaa13161
--- /dev/null
+++ b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh
@@ -0,0 +1,50 @@
+#!/bin/bash -le
+
+export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update
+export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine
+
+#rm -rf ${KOKKOS_PATH}
+#rm -rf ${TRILINOS_UPDATED_PATH}
+#rm -rf ${TRILINOS_PRISTINE_PATH}
+
+#Already done:
+if [ ! -d "${TRILINOS_UPDATED_PATH}" ]; then
+  git clone https://github.com/trilinos/trilinos ${TRILINOS_UPDATED_PATH}
+fi
+if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then
+  git clone https://github.com/trilinos/trilinos ${TRILINOS_PRISTINE_PATH}
+fi
+
+cd ${TRILINOS_UPDATED_PATH}
+git checkout develop
+git reset --hard origin/develop
+git pull
+cd ..
+
+python kokkos/config/snapshot.py ${KOKKOS_PATH} ${TRILINOS_UPDATED_PATH}/packages
+
+cd ${TRILINOS_UPDATED_PATH}
+echo ""
+echo ""
+echo "Trilinos State:"
+git log --pretty=oneline --since=2.days
+SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
+cd ..
+
+cd ${TRILINOS_PRISTINE_PATH}
+git status
+git log --pretty=oneline --since=2.days
+echo "Checkout develop"
+git checkout develop
+echo "Pull"
+git pull
+echo "Checkout SHA"
+git checkout ${SHA}
+cd ..
+
+cd ${TRILINOS_PRISTINE_PATH}
+echo ""
+echo ""
+echo "Trilinos Pristine State:"
+git log --pretty=oneline --since=2.days
+cd ..
diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt
index 726d403452bab92dfaab0a3275d9be42af6afa4f..403ac746f6d6109a40d117b549235eae76965119 100644
--- a/lib/kokkos/containers/performance_tests/CMakeLists.txt
+++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt
@@ -1,6 +1,6 @@
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
 
 SET(SOURCES
diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile
index e7abaf44ce07fb725bb1947d86b573ac6a15dae4..fa3bc777013fd5148a2a49c26c00df4aba9786e7 100644
--- a/lib/kokkos/containers/performance_tests/Makefile
+++ b/lib/kokkos/containers/performance_tests/Makefile
@@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
 default: build_all
 	echo "End Build"
 
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= -lpthread
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
 
 TEST_TARGETS = 
diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp
index 8183adaa60b8226fdd5979253cc619ff90e701ba..e7afad905bff4a83859e005944f0904a9edc0699 100644
--- a/lib/kokkos/containers/performance_tests/TestCuda.cpp
+++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp
@@ -83,7 +83,7 @@ TEST_F( cuda, dynrankview_perf )
 {
   std::cout << "Cuda" << std::endl;
   std::cout << " DynRankView vs View: Initialization Only " << std::endl;
-  test_dynrankview_op_perf<Kokkos::Cuda>( 4096 );
+  test_dynrankview_op_perf<Kokkos::Cuda>( 40960 );
 }
 
 TEST_F( cuda, global_2_local)
diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp
index aab6e6988fc847360f02474daab52110a18ef8ef..d96a3f74324046862b4740c4d9c3ae7a178937d8 100644
--- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp
+++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp
@@ -180,8 +180,8 @@ void test_dynrankview_op_perf( const int par_size )
 
   typedef DeviceType execution_space;
   typedef typename execution_space::size_type size_type;
-  const size_type dim2 = 900;
-  const size_type dim3 = 300;
+  const size_type dim2 = 90;
+  const size_type dim3 = 30;
 
   double elapsed_time_view = 0;
   double elapsed_time_compview = 0;
diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp
index 1230df4d97741123f2be0a011fd8fd7a40fbd35f..3a0196ee4c5ea48fcd9e1895212f655c6b81e6a1 100644
--- a/lib/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp
@@ -261,9 +261,6 @@ public:
     modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
     modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
   {
-#if ! KOKKOS_USING_EXP_VIEW
-    Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
-#else
     if ( int(d_view.rank)     != int(h_view.rank) ||
          d_view.dimension_0() != h_view.dimension_0() ||
          d_view.dimension_1() != h_view.dimension_1() ||
@@ -284,7 +281,6 @@ public:
          d_view.span()        != h_view.span() ) {
       Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
     }
-#endif
   }
 
   //@}
@@ -315,13 +311,13 @@ public:
   template< class Device >
   KOKKOS_INLINE_FUNCTION
   const typename Impl::if_c<
-    Impl::is_same<typename t_dev::memory_space,
+    std::is_same<typename t_dev::memory_space,
                           typename Device::memory_space>::value,
     t_dev,
     t_host>::type& view () const
   {
     return Impl::if_c<
-      Impl::is_same<
+      std::is_same<
         typename t_dev::memory_space,
         typename Device::memory_space>::value,
       t_dev,
@@ -347,13 +343,13 @@ public:
   ///   appropriate template parameter.
   template<class Device>
   void sync( const typename Impl::enable_if<
-        ( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
-        ( Impl::is_same< Device , int>::value)
+        ( std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
+        ( std::is_same< Device , int>::value)
         , int >::type& = 0)
   {
     const unsigned int dev =
       Impl::if_c<
-        Impl::is_same<
+        std::is_same<
           typename t_dev::memory_space,
           typename Device::memory_space>::value ,
         unsigned int,
@@ -370,7 +366,7 @@ public:
         modified_host() = modified_device() = 0;
       }
     }
-    if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
+    if(std::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
       t_dev::execution_space::fence();
       t_host::execution_space::fence();
     }
@@ -378,13 +374,13 @@ public:
 
   template<class Device>
   void sync ( const typename Impl::enable_if<
-      ( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
-      ( Impl::is_same< Device , int>::value)
+      ( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
+      ( std::is_same< Device , int>::value)
       , int >::type& = 0 )
   {
     const unsigned int dev =
       Impl::if_c<
-        Impl::is_same<
+        std::is_same<
           typename t_dev::memory_space,
           typename Device::memory_space>::value,
         unsigned int,
@@ -405,7 +401,7 @@ public:
   {
     const unsigned int dev =
       Impl::if_c<
-        Impl::is_same<
+        std::is_same<
           typename t_dev::memory_space,
           typename Device::memory_space>::value ,
         unsigned int,
@@ -431,7 +427,7 @@ public:
   void modify () {
     const unsigned int dev =
       Impl::if_c<
-        Impl::is_same<
+        std::is_same<
           typename t_dev::memory_space,
           typename Device::memory_space>::value,
         unsigned int,
@@ -514,11 +510,7 @@ public:
 
   //! The allocation size (same as Kokkos::View::capacity).
   size_t capacity() const {
-#if KOKKOS_USING_EXP_VIEW
     return d_view.span();
-#else
-    return d_view.capacity();
-#endif
   }
 
   //! Get stride(s) for each dimension.
@@ -555,8 +547,6 @@ public:
 // Partial specializations of Kokkos::subview() for DualView objects.
 //
 
-#if KOKKOS_USING_EXP_VIEW
-
 namespace Kokkos {
 namespace Impl {
 
@@ -590,352 +580,6 @@ subview( const DualView<D,A1,A2,A3> & src , Args ... args )
 
 } /* namespace Kokkos */
 
-#else
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-//
-// Partial specializations of Kokkos::subview() for DualView objects.
-//
-
-namespace Kokkos {
-namespace Impl {
-
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
-        >
-struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type  >
-                  , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                  , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
-{
-private:
-
-  typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >  SrcViewType ;
-
-  enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
-  enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
-  enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
-  enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
-  enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
-  enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
-  enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
-  enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
-
-  // The source view rank must be equal to the input argument rank
-  // Once a void argument is encountered all subsequent arguments must be void.
-  enum { InputRank =
-    Impl::StaticAssert<( SrcViewType::rank ==
-                         ( V0 ? 0 : (
-                           V1 ? 1 : (
-                           V2 ? 2 : (
-                           V3 ? 3 : (
-                           V4 ? 4 : (
-                           V5 ? 5 : (
-                           V6 ? 6 : (
-                           V7 ? 7 : 8 ))))))) ))
-                       &&
-                       ( SrcViewType::rank ==
-                         ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
-    >::value ? SrcViewType::rank : 0 };
-
-  enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
-  enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
-  enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
-  enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
-  enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
-  enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
-  enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
-  enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
-
-  enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
-                    + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
-
-  // Reverse
-  enum { R0_rev = 0 == InputRank ? 0u : (
-                  1 == InputRank ? unsigned(R0) : (
-                  2 == InputRank ? unsigned(R1) : (
-                  3 == InputRank ? unsigned(R2) : (
-                  4 == InputRank ? unsigned(R3) : (
-                  5 == InputRank ? unsigned(R4) : (
-                  6 == InputRank ? unsigned(R5) : (
-                  7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
-
-  typedef typename SrcViewType::array_layout  SrcViewLayout ;
-
-  // Choose array layout, attempting to preserve original layout if at all possible.
-  typedef typename Impl::if_c<
-     ( // Same Layout IF
-       // OutputRank 0
-       ( OutputRank == 0 )
-       ||
-       // OutputRank 1 or 2, InputLayout Left, Interval 0
-       // because single stride one or second index has a stride.
-       ( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
-       ||
-       // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
-       // because single stride one or second index has a stride.
-       ( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
-     ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
-
-  // Choose data type as a purely dynamic rank array to accomodate a runtime range.
-  typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
-          typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
-          typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
-          typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
-          typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
-          typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
-          typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
-          typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
-                                                 typename SrcViewType::value_type ********
-  >::type >::type >::type >::type >::type >::type >::type >::type  OutputData ;
-
-  // Choose space.
-  // If the source view's template arg1 or arg2 is a space then use it,
-  // otherwise use the source view's execution space.
-
-  typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
-          typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
-  >::type >::type OutputSpace ;
-
-public:
-
-  // If keeping the layout then match non-data type arguments
-  // else keep execution space and memory traits.
-  typedef typename
-    Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
-              , Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
-              , Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
-                            , typename SrcViewType::memory_traits >
-              >::type  type ;
-};
-
-} /* namespace Impl */
-} /* namespace Kokkos */
-
-namespace Kokkos {
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , void , void , void
-                          , void , void , void , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , void , void , void
-                 , void , void , void , void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0);
-  sub_view.h_view = subview(src.h_view,arg0);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , void , void
-                          , void , void , void , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , void , void
-                 , void , void , void , void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1);
-  sub_view.h_view = subview(src.h_view,arg0,arg1);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 , class ArgType2 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , ArgType2 , void
-                          , void , void , void , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 ,
-         const ArgType2 & arg2 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , ArgType2 , void
-                 , void , void , void , void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
-  sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                          , void , void , void , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 ,
-         const ArgType2 & arg2 ,
-         const ArgType3 & arg3 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                 , void , void , void , void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
-  sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
-          class ArgType4 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                          , ArgType4 , void , void , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 ,
-         const ArgType2 & arg2 ,
-         const ArgType3 & arg3 ,
-         const ArgType4 & arg4 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                 , ArgType4 , void , void ,void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
-  sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
-          class ArgType4 , class ArgType5 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                          , ArgType4 , ArgType5 , void , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 ,
-         const ArgType2 & arg2 ,
-         const ArgType3 & arg3 ,
-         const ArgType4 & arg4 ,
-         const ArgType5 & arg5 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                 , ArgType4 , ArgType5 , void , void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
-  sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
-          class ArgType4 , class ArgType5 , class ArgType6 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                          , ArgType4 , ArgType5 , ArgType6 , void
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 ,
-         const ArgType2 & arg2 ,
-         const ArgType3 & arg3 ,
-         const ArgType4 & arg4 ,
-         const ArgType5 & arg5 ,
-         const ArgType6 & arg6 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                 , ArgType4 , ArgType5 , ArgType6 , void
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
-  sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-template< class D , class A1 , class A2 , class A3 ,
-          class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
-          class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
-typename Impl::ViewSubview< DualView<D,A1,A2,A3>
-                          , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                          , ArgType4 , ArgType5 , ArgType6 , ArgType7
-                          >::type
-subview( const DualView<D,A1,A2,A3> & src ,
-         const ArgType0 & arg0 ,
-         const ArgType1 & arg1 ,
-         const ArgType2 & arg2 ,
-         const ArgType3 & arg3 ,
-         const ArgType4 & arg4 ,
-         const ArgType5 & arg5 ,
-         const ArgType6 & arg6 ,
-         const ArgType7 & arg7 )
-{
-  typedef typename
-    Impl::ViewSubview< DualView<D,A1,A2,A3>
-                 , ArgType0 , ArgType1 , ArgType2 , ArgType3
-                 , ArgType4 , ArgType5 , ArgType6 , ArgType7
-                 >::type
-      DstViewType ;
-  DstViewType sub_view;
-  sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
-  sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
-  sub_view.modified_device = src.modified_device;
-  sub_view.modified_host = src.modified_host;
-  return sub_view;
-}
-
-} // namespace Kokkos
-
-#endif /* KOKKOS_USING_EXP_VIEW */
-
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
index f72277700ad87cd0fe9cb1cdee4c2d34ff69ab80..1ac92b9d17c75cd032620e77bd324274a6746cb9 100644
--- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -223,14 +223,85 @@ struct DynRankDimTraits {
                  );
   }
 
-  template < typename DynRankViewType , typename iType >
-  void verify_dynrankview_rank ( iType N , const DynRankViewType &drv )
-  {
-    if ( static_cast<iType>(drv.rank()) > N )
-       {
-         Kokkos::abort( "Need at least rank arguments to the operator()" ); 
-       }
+
+/** \brief  Debug bounds-checking routines */
+// Enhanced debug checking - most infrastructure matches that of functions in 
+// Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0
+template< unsigned ,  typename iType0 , class MapType >
+KOKKOS_INLINE_FUNCTION
+bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & )
+{ return true ; }
+
+template< unsigned R , typename iType0 ,  class MapType , typename iType1 , class ... Args >
+KOKKOS_INLINE_FUNCTION
+bool dyn_rank_view_verify_operator_bounds
+  ( const iType0  & rank 
+  , const MapType & map
+  , const iType1  & i
+  , Args ... args
+  )
+{
+  if ( static_cast<iType0>(R) < rank ) { 
+    return ( size_t(i) < map.extent(R) )
+       && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... );
+  }
+  else if ( i != 0 ) {
+    printf("DynRankView Debug Bounds Checking Error: at rank %u\n  Extra arguments beyond the rank must be zero \n",R);
+    return ( false )
+       && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); 
   }
+  else {
+    return ( true )
+       && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... );
+  }
+}
+
+template< unsigned , class MapType >
+inline
+void dyn_rank_view_error_operator_bounds( char * , int , const MapType & )
+{}
+
+template< unsigned R , class MapType , class iType , class ... Args >
+inline
+void dyn_rank_view_error_operator_bounds
+  ( char * buf
+  , int len
+  , const MapType & map
+  , const iType   & i
+  , Args ... args
+  )
+{
+  const int n =
+    snprintf(buf,len," %ld < %ld %c"
+            , static_cast<unsigned long>(i)
+            , static_cast<unsigned long>( map.extent(R) )
+            , ( sizeof...(Args) ? ',' : ')' )
+            );
+  dyn_rank_view_error_operator_bounds<R+1>(buf+n,len-n,map,args...);
+}
+
+// op_rank = rank of the operator version that was called
+template< typename iType0 , typename iType1 ,  class MapType , class ... Args >
+KOKKOS_INLINE_FUNCTION
+void dyn_rank_view_verify_operator_bounds
+  ( const iType0 & op_rank , const iType1 & rank , const char* label , const MapType & map , Args ... args )
+{
+  if ( static_cast<iType0>(rank) > op_rank ) {
+    Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); 
+  }
+
+  if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) {
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+    enum { LEN = 1024 };
+    char buffer[ LEN ];
+    int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label);
+    dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... );
+    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
+#else
+    Kokkos::abort("DynRankView bounds error");
+#endif
+  }
+}
 
 
 /** \brief  Assign compatible default mappings */
@@ -341,7 +412,6 @@ class DynRankView : public ViewTraits< DataType , Properties ... >
 
 private: 
   template < class , class ... > friend class DynRankView ;
-//  template < class , class ... > friend class Kokkos::Experimental::View ; //unnecessary now...
   template < class , class ... > friend class Impl::ViewMapping ;
 
 public: 
@@ -504,20 +574,26 @@ private:
       ( is_layout_left || is_layout_right || is_layout_stride )
   };
 
+  template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space
+    { KOKKOS_FORCEINLINE_FUNCTION static void check() {} };
+
+  template< class Space > struct verify_space<Space,false>
+    { KOKKOS_FORCEINLINE_FUNCTION static void check()
+        { Kokkos::abort("Kokkos::DynRankView ERROR: attempt to access inaccessible memory space"); };
+    };
+
 // Bounds checking macros
 #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
 
-#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \
-  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
-    < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \
-  Kokkos::Experimental::Impl::verify_dynrankview_rank ( N , *this ) ; \
-  Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; 
+// rank of the calling operator - included as first argument in ARG
+#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
+  DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
+  Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ; 
 
 #else
 
-#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \
-  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
-    < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify();
+#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
+  DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
 
 #endif
 
@@ -532,7 +608,11 @@ public:
   KOKKOS_INLINE_FUNCTION
   reference_type operator()() const
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 0 , ( implementation_map() ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() ,  NULL , m_map) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
+      #endif
       return implementation_map().reference();
       //return m_map.reference(0,0,0,0,0,0,0); 
     }
@@ -563,12 +643,17 @@ public:
       return rankone_view(i0);
     }
 
+  // Rank 1 parenthesis
   template< typename iType >
   KOKKOS_INLINE_FUNCTION
   typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
   operator()(const iType & i0 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 1 , ( m_map , i0 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
+      #endif
       return m_map.reference(i0); 
     }
 
@@ -577,6 +662,11 @@ public:
   typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
   operator()(const iType & i0 ) const
     {
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
+      #endif
       return m_map.reference(i0,0,0,0,0,0,0);
     }
 
@@ -586,7 +676,11 @@ public:
   typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value  && std::is_integral<iType1>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
+      #endif
       return m_map.reference(i0,i1); 
     }
 
@@ -595,7 +689,11 @@ public:
   typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
+      #endif
       return m_map.reference(i0,i1,0,0,0,0,0); 
     }
 
@@ -605,7 +703,11 @@ public:
   typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value  && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
+      #endif
       return m_map.reference(i0,i1,i2); 
     }
 
@@ -614,7 +716,11 @@ public:
   typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
+      #endif
       return m_map.reference(i0,i1,i2,0,0,0,0); 
     }
 
@@ -624,7 +730,11 @@ public:
   typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value  && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
+      #endif
       return m_map.reference(i0,i1,i2,i3); 
     }
 
@@ -633,7 +743,11 @@ public:
   typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
+      #endif
       return m_map.reference(i0,i1,i2,i3,0,0,0); 
     }
 
@@ -643,7 +757,11 @@ public:
   typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value  && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
+      #endif
       return m_map.reference(i0,i1,i2,i3,i4); 
     }
 
@@ -652,7 +770,11 @@ public:
   typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
+      #endif
       return m_map.reference(i0,i1,i2,i3,i4,0,0); 
     }
 
@@ -662,7 +784,11 @@ public:
   typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value  && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
+      #endif
       return m_map.reference(i0,i1,i2,i3,i4,i5); 
     }
 
@@ -671,7 +797,11 @@ public:
   typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
+      #endif
       return m_map.reference(i0,i1,i2,i3,i4,i5,0); 
     }
 
@@ -681,7 +811,11 @@ public:
   typename std::enable_if< (std::is_integral<iType0>::value  && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type
   operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const 
     { 
-      KOKKOS_VIEW_OPERATOR_VERIFY( 7 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 , i6 ) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
+      #endif
       return m_map.reference(i0,i1,i2,i3,i4,i5,i6); 
     }
 
@@ -1136,13 +1270,13 @@ private:
 
 public:
 
-  typedef Kokkos::Experimental::ViewTraits
+  typedef Kokkos::ViewTraits
     < data_type
     , array_layout 
     , typename SrcTraits::device_type
     , typename SrcTraits::memory_traits > traits_type ;
 
-  typedef Kokkos::Experimental::View
+  typedef Kokkos::View
     < data_type
     , array_layout 
     , typename SrcTraits::device_type
@@ -1154,13 +1288,13 @@ public:
 
     static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
 
-    typedef Kokkos::Experimental::ViewTraits
+    typedef Kokkos::ViewTraits
       < data_type 
       , array_layout
       , typename SrcTraits::device_type
       , MemoryTraits > traits_type ;
 
-    typedef Kokkos::Experimental::View
+    typedef Kokkos::View
       < data_type 
       , array_layout
       , typename SrcTraits::device_type
@@ -1264,7 +1398,7 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args.
     if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args
       { Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); }
   
-    typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::Experimental::ViewTraits< D*******, P... > , Args... > metafcn ;
+    typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
 
     return metafcn::subview( src.rank() , src , args... );
   }
@@ -1502,10 +1636,10 @@ void deep_copy
   typedef typename src_type::memory_space     src_memory_space ;
 
   enum { DstExecCanAccessSrc =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
 
   enum { SrcExecCanAccessDst =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible };
 
   if ( (void *) dst.data() != (void*) src.data() ) {
 
@@ -1666,7 +1800,7 @@ inline
 typename DynRankView<T,P...>::HostMirror
 create_mirror( const DynRankView<T,P...> & src
              , typename std::enable_if<
-                 ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
+                 ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
                                , Kokkos::LayoutStride >::value
                >::type * = 0
              )
@@ -1684,7 +1818,7 @@ inline
 typename DynRankView<T,P...>::HostMirror
 create_mirror( const DynRankView<T,P...> & src
              , typename std::enable_if<
-                 std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
+                 std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
                              , Kokkos::LayoutStride >::value
                >::type * = 0
              )
@@ -1779,7 +1913,7 @@ void resize( DynRankView<T,P...> & v ,
 {
   typedef DynRankView<T,P...>  drview_type ;
 
-  static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
+  static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
 
   drview_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6 );
 
@@ -1803,7 +1937,7 @@ void realloc( DynRankView<T,P...> & v ,
 {
   typedef DynRankView<T,P...>  drview_type ;
 
-  static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
+  static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
 
   const std::string label = v.label();
 
diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
index fb364f0bf252e2ccae8aa04544487bc8f3f1a74f..3277c007d0845485a57ed7aabfa35202f1b22d1b 100644
--- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
@@ -56,7 +56,7 @@ namespace Experimental {
  *         Subviews are not allowed.
  */
 template< typename DataType , typename ... P >
-class DynamicView : public Kokkos::Experimental::ViewTraits< DataType , P ... >
+class DynamicView : public Kokkos::ViewTraits< DataType , P ... >
 { 
 public:
 
@@ -75,6 +75,15 @@ private:
                  std::is_same< typename traits::specialize , void >::value
                , "DynamicView must have trivial data type" );
 
+
+  template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space
+    { KOKKOS_FORCEINLINE_FUNCTION static void check() {} };
+
+  template< class Space > struct verify_space<Space,false>
+    { KOKKOS_FORCEINLINE_FUNCTION static void check()
+        { Kokkos::abort("Kokkos::DynamicView ERROR: attempt to access inaccessible memory space"); };
+    };
+
 public:
 
   typedef Kokkos::Experimental::MemoryPool< typename traits::device_type > memory_pool ;
@@ -117,10 +126,10 @@ public:
   KOKKOS_INLINE_FUNCTION constexpr size_t size() const
     {
       return
-        Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
+        Kokkos::Impl::MemorySpaceAccess
           < Kokkos::Impl::ActiveExecutionMemorySpace
           , typename traits::memory_space
-          >::value 
+          >::accessible 
         ? // Runtime size is at the end of the chunk pointer array
           (*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max ))
           << m_chunk_shift
@@ -179,10 +188,7 @@ public:
       static_assert( Kokkos::Impl::are_integral<I0,Args...>::value
                    , "Indices must be integral type" );
 
-      Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
-        < Kokkos::Impl::ActiveExecutionMemorySpace
-        , typename traits::memory_space
-        >::verify();
+      DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
 
       // Which chunk is being indexed.
       const uintptr_t ic = uintptr_t( i0 >> m_chunk_shift );
@@ -223,15 +229,13 @@ public:
     {
       typedef typename traits::value_type value_type ;
 
-      Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
-        < Kokkos::Impl::ActiveExecutionMemorySpace
-        , typename traits::memory_space >::verify();
+      DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
 
       const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
 
       if ( m_chunk_max < NC ) {
 #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-        printf("DynamicView::resize_parallel(%lu) m_chunk_max(%lu) NC(%lu)\n"
+        printf("DynamicView::resize_parallel(%lu) m_chunk_max(%u) NC(%lu)\n"
               , n , m_chunk_max , NC );
 #endif
         Kokkos::abort("DynamicView::resize_parallel exceeded maximum size");
@@ -269,9 +273,7 @@ public:
   inline
   void resize_serial( size_t n )
     {
-      Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
-        < Kokkos::Impl::ActiveExecutionMemorySpace
-        , typename traits::memory_space >::verify();
+      DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
 
       const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
 
@@ -398,9 +400,7 @@ public:
     , m_chunk_mask( ( 1 << m_chunk_shift ) - 1 )
     , m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift )
     {
-      Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
-        < Kokkos::Impl::ActiveExecutionMemorySpace
-        , typename traits::memory_space >::verify();
+      DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
 
       // A functor to deallocate all of the chunks upon final destruction
 
@@ -452,7 +452,7 @@ void deep_copy( const View<T,DP...> & dst
   typedef typename ViewTraits<T,SP...>::memory_space     src_memory_space ;
 
   enum { DstExecCanAccessSrc =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
 
   if ( DstExecCanAccessSrc ) {
     // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
@@ -476,7 +476,7 @@ void deep_copy( const DynamicView<T,DP...> & dst
   typedef typename ViewTraits<T,SP...>::memory_space     src_memory_space ;
 
   enum { DstExecCanAccessSrc =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
 
   if ( DstExecCanAccessSrc ) {
     // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
diff --git a/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c90e4c238654b5458f24db2083eb85e815b683c
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
@@ -0,0 +1,196 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP
+#define KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP
+
+#include <vector>
+#include <Kokkos_Core.hpp>
+#include <Kokkos_View.hpp>
+#include <Kokkos_DualView.hpp>
+
+namespace Kokkos {
+namespace Experimental {
+
+template <typename ReportType, typename DeviceType>
+class ErrorReporter
+{
+public:
+
+  typedef ReportType                                      report_type;
+  typedef DeviceType                                      device_type;
+  typedef typename device_type::execution_space           execution_space;
+
+  ErrorReporter(int max_results)
+    : m_numReportsAttempted(""),
+      m_reports("", max_results),
+      m_reporters("", max_results)
+  {
+    clear();
+  }
+
+  int getCapacity() const { return m_reports.h_view.dimension_0(); }
+
+  int getNumReports();
+
+  int getNumReportAttempts();
+
+  void getReports(std::vector<int> &reporters_out, std::vector<report_type> &reports_out);
+  void getReports( typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror &reporters_out,
+                   typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror &reports_out);
+
+  void clear();
+
+  void resize(const size_t new_size);
+
+  bool full() {return (getNumReportAttempts() >= getCapacity()); }
+
+  KOKKOS_INLINE_FUNCTION
+  bool add_report(int reporter_id, report_type report) const
+  {
+    int idx = Kokkos::atomic_fetch_add(&m_numReportsAttempted(), 1);
+
+    if (idx >= 0 && (idx < static_cast<int>(m_reports.d_view.dimension_0()))) {
+      m_reporters.d_view(idx) = reporter_id;
+      m_reports.d_view(idx)   = report;
+      return true;
+    }
+    else {
+      return false;
+    }
+  }
+
+private:
+
+  typedef Kokkos::View<report_type *, execution_space>        reports_view_t;
+  typedef Kokkos::DualView<report_type *, execution_space>    reports_dualview_t;
+
+  typedef typename reports_dualview_t::host_mirror_space  host_mirror_space;
+  Kokkos::View<int, execution_space>   m_numReportsAttempted;
+  reports_dualview_t                   m_reports;
+  Kokkos::DualView<int *, execution_space> m_reporters;
+
+};
+
+
+template <typename ReportType, typename DeviceType>
+inline int ErrorReporter<ReportType, DeviceType>::getNumReports() 
+{
+  int num_reports = 0;
+  Kokkos::deep_copy(num_reports,m_numReportsAttempted);
+  if (num_reports > static_cast<int>(m_reports.h_view.dimension_0())) {
+    num_reports = m_reports.h_view.dimension_0();
+  }
+  return num_reports;
+}
+
+template <typename ReportType, typename DeviceType>
+inline int ErrorReporter<ReportType, DeviceType>::getNumReportAttempts()
+{
+  int num_reports = 0;
+  Kokkos::deep_copy(num_reports,m_numReportsAttempted);
+  return num_reports;
+}
+
+template <typename ReportType, typename DeviceType>
+void ErrorReporter<ReportType, DeviceType>::getReports(std::vector<int> &reporters_out, std::vector<report_type> &reports_out)
+{
+  int num_reports = getNumReports();
+  reporters_out.clear();
+  reporters_out.reserve(num_reports);
+  reports_out.clear();
+  reports_out.reserve(num_reports);
+
+  if (num_reports > 0) {
+    m_reports.template sync<host_mirror_space>();
+    m_reporters.template sync<host_mirror_space>();
+
+    for (int i = 0; i < num_reports; ++i) {
+      reporters_out.push_back(m_reporters.h_view(i));
+      reports_out.push_back(m_reports.h_view(i));
+    }
+  }
+}
+
+template <typename ReportType, typename DeviceType>
+void ErrorReporter<ReportType, DeviceType>::getReports(
+    typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror &reporters_out,
+    typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror &reports_out)
+{
+  int num_reports = getNumReports();
+  reporters_out = typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror("ErrorReport::reporters_out",num_reports);
+  reports_out = typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror("ErrorReport::reports_out",num_reports);
+
+  if (num_reports > 0) {
+    m_reports.template sync<host_mirror_space>();
+    m_reporters.template sync<host_mirror_space>();
+
+    for (int i = 0; i < num_reports; ++i) {
+      reporters_out(i) = m_reporters.h_view(i);
+      reports_out(i) = m_reports.h_view(i);
+    }
+  }
+}
+
+template <typename ReportType, typename DeviceType>
+void ErrorReporter<ReportType, DeviceType>::clear()
+{
+  int num_reports=0;
+  Kokkos::deep_copy(m_numReportsAttempted, num_reports);
+  m_reports.template modify<execution_space>();
+  m_reporters.template modify<execution_space>();
+}
+
+template <typename ReportType, typename DeviceType>
+void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size)
+{
+  m_reports.resize(new_size);
+  m_reporters.resize(new_size);
+  Kokkos::fence();
+}
+
+
+} // namespace Experimental
+} // namespace kokkos
+
+#endif
diff --git a/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp b/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp
deleted file mode 100644
index 5dd7a98b893f0418fb31c7ae6026ac30c886f84b..0000000000000000000000000000000000000000
--- a/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
-#define KOKKOS_SEGMENTED_VIEW_HPP_
-
-#include <Kokkos_Core.hpp>
-#include <impl/Kokkos_Error.hpp>
-#include <cstdio>
-
-#if ! KOKKOS_USING_EXP_VIEW
-
-namespace Kokkos {
-namespace Experimental {
-
-namespace Impl {
-
-template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
-struct delete_segmented_view;
-
-template<class MemorySpace>
-inline
-void DeviceSetAllocatableMemorySize(size_t) {}
-
-#if defined( KOKKOS_HAVE_CUDA )
-
-template<>
-inline
-void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
-#ifdef __CUDACC__
-  size_t size_limit;
-  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
-  if(size_limit<size)
-    cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
-  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
-#endif
-}
-
-template<>
-inline
-void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
-#ifdef __CUDACC__
-  size_t size_limit;
-  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
-  if(size_limit<size)
-    cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
-  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
-#endif
-}
-
-#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
-
-}
-
-template< class DataType ,
-          class Arg1Type = void ,
-          class Arg2Type = void ,
-          class Arg3Type = void>
-class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
-{
-public:
-  //! \name Typedefs for device types and various Kokkos::View specializations.
-  //@{
-  typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
-
-  //! The type of a Kokkos::View on the device.
-  typedef Kokkos::View< typename traits::data_type ,
-                typename traits::array_layout ,
-                typename traits::memory_space ,
-                Kokkos::MemoryUnmanaged > t_dev ;
-
-
-private:
-  Kokkos::View<t_dev*,typename traits::memory_space> segments_;
-
-  Kokkos::View<int,typename traits::memory_space> realloc_lock;
-  Kokkos::View<int,typename traits::memory_space> nsegments_;
-
-  size_t segment_length_;
-  size_t segment_length_m1_;
-  int max_segments_;
-
-  int segment_length_log2;
-
-  // Dimensions, cardinality, capacity, and offset computation for
-  // multidimensional array view of contiguous memory.
-  // Inherits from Impl::Shape
-  typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
-                          , typename traits::array_layout
-                          > offset_map_type ;
-
-  offset_map_type               m_offset_map ;
-
-  typedef Kokkos::View< typename traits::array_intrinsic_type ,
-                typename traits::array_layout ,
-                typename traits::memory_space ,
-                typename traits::memory_traits > array_type ;
-
-  typedef Kokkos::View< typename traits::const_data_type ,
-                typename traits::array_layout ,
-                typename traits::memory_space ,
-                typename traits::memory_traits > const_type ;
-
-  typedef Kokkos::View< typename traits::non_const_data_type ,
-                typename traits::array_layout ,
-                typename traits::memory_space ,
-                typename traits::memory_traits > non_const_type ;
-
-  typedef Kokkos::View< typename traits::non_const_data_type ,
-                typename traits::array_layout ,
-                HostSpace ,
-                void > HostMirror ;
-
-  template< bool Accessible >
-  KOKKOS_INLINE_FUNCTION
-  typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
-  dimension_0_intern() const { return nsegments_() * segment_length_ ; }
-
-  template< bool Accessible >
-  KOKKOS_INLINE_FUNCTION
-  typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
-  dimension_0_intern() const
-  {
-    // In Host space
-    int n = 0 ;
-#if ! defined( __CUDA_ARCH__ )
-    Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
-#endif
-
-    return n * segment_length_ ;
-  }
-
-public:
-
-  enum { Rank = traits::rank };
-
-  KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
-
-  /* \brief return (current) size of dimension 0 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
-    enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
-      Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
-    int n = SegmentedView::dimension_0_intern< Accessible >();
-    return n ;
-  }
-
-  /* \brief return size of dimension 1 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
-  /* \brief return size of dimension 2 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
-  /* \brief return size of dimension 3 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
-  /* \brief return size of dimension 4 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
-  /* \brief return size of dimension 5 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
-  /* \brief return size of dimension 6 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
-  /* \brief return size of dimension 7 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
-
-  /* \brief return size of dimension 2 */
-  KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
-    return dimension_0() *
-        m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
-        m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
-  }
-
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  typename traits::size_type dimension( const iType & i ) const {
-    if(i==0)
-      return dimension_0();
-    else
-      return Kokkos::Impl::dimension( m_offset_map , i );
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  typename traits::size_type capacity() {
-    return segments_.dimension_0() *
-        m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
-        m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  typename traits::size_type get_num_segments() {
-    enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
-      Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
-    int n = SegmentedView::dimension_0_intern< Accessible >();
-    return n/segment_length_ ;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  typename traits::size_type get_max_segments() {
-    return max_segments_;
-  }
-
-  /// \brief Constructor that allocates View objects with an initial length of 0.
-  ///
-  /// This constructor works mostly like the analogous constructor of View.
-  /// The first argument is a string label, which is entirely for your
-  /// benefit.  (Different SegmentedView objects may have the same label if
-  /// you like.)  The second argument 'view_length' is the size of the segments.
-  /// This number must be a power of two. The third argument n0 is the maximum
-  /// value for the first dimension of the segmented view. The maximal allocatable
-  /// number of Segments is thus: (n0+view_length-1)/view_length.
-  /// The arguments that follow are the other dimensions of the (1-7) of the
-  /// View objects.  For example, for a View with 3 runtime dimensions,
-  /// the first 4 integer arguments will be nonzero:
-  /// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
-  /// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
-  /// the segmented view is (n,8,4) with n between 0 and 10000000.
-  /// You may omit the integer arguments that follow.
-  template< class LabelType >
-  SegmentedView(const LabelType & label ,
-      const size_t view_length ,
-      const size_t n0 ,
-      const size_t n1 = 0 ,
-      const size_t n2 = 0 ,
-      const size_t n3 = 0 ,
-      const size_t n4 = 0 ,
-      const size_t n5 = 0 ,
-      const size_t n6 = 0 ,
-      const size_t n7 = 0
-      ): segment_length_(view_length),segment_length_m1_(view_length-1)
-  {
-    segment_length_log2 = -1;
-    size_t l = segment_length_;
-    while(l>0) {
-      l>>=1;
-      segment_length_log2++;
-    }
-    l = 1<<segment_length_log2;
-    if(l!=segment_length_)
-      Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
-
-    max_segments_ = (n0+segment_length_m1_)/segment_length_;
-
-    Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
-
-    segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
-    realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
-    nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
-    m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
-
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  SegmentedView(const SegmentedView& src):
-    segments_(src.segments_),
-    realloc_lock (src.realloc_lock),
-    nsegments_ (src.nsegments_),
-    segment_length_(src.segment_length_),
-    segment_length_m1_(src.segment_length_m1_),
-    max_segments_ (src.max_segments_),
-    segment_length_log2(src.segment_length_log2),
-    m_offset_map (src.m_offset_map)
-  {}
-
-  KOKKOS_INLINE_FUNCTION
-  SegmentedView& operator= (const SegmentedView& src) {
-    segments_ = src.segments_;
-    realloc_lock = src.realloc_lock;
-    nsegments_ = src.nsegments_;
-    segment_length_= src.segment_length_;
-    segment_length_m1_= src.segment_length_m1_;
-    max_segments_ = src.max_segments_;
-    segment_length_log2= src.segment_length_log2;
-    m_offset_map = src.m_offset_map;
-    return *this;
-  }
-
-  ~SegmentedView() {
-    if ( !segments_.tracker().ref_counting()) { return; }
-    size_t ref_count = segments_.tracker().ref_count();
-    if(ref_count == 1u) {
-      Kokkos::fence();
-      typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
-      Kokkos::deep_copy(h_nviews,nsegments_);
-      Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  t_dev get_segment(const int& i) const {
-    return segments_[i];
-  }
-
-  template< class MemberType>
-  KOKKOS_INLINE_FUNCTION
-  void grow (MemberType& team_member, const size_t& growSize) const {
-    if (growSize>max_segments_*segment_length_) {
-      printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
-      return;
-    }
-
-    if(team_member.team_rank()==0) {
-      bool too_small = growSize > segment_length_ * nsegments_();
-      if (too_small) {
-        while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
-          ; // get the lock
-        too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
-        if(too_small) {
-          while(too_small) {
-            const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
-                m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
-            typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
-
-            segments_(nsegments_()) =
-                t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
-            nsegments_()++;
-            too_small = growSize > segment_length_ * nsegments_();
-          }
-        }
-        realloc_lock() = 0; //release the lock
-      }
-    }
-    team_member.team_barrier();
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void grow_non_thread_safe (const size_t& growSize) const {
-    if (growSize>max_segments_*segment_length_) {
-      printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
-      return;
-    }
-    bool too_small = growSize > segment_length_ * nsegments_();
-    if(too_small) {
-      while(too_small) {
-        const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
-                            m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
-        typename traits::non_const_value_type* const ptr =
-          new typename traits::non_const_value_type[alloc_size];
-
-        segments_(nsegments_()) =
-          t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
-                 m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
-                 m_offset_map.N6, m_offset_map.N7);
-        nsegments_()++;
-        too_small = growSize > segment_length_ * nsegments_();
-      }
-    }
-  }
-
-  template< typename iType0 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
-    }
-
-  template< typename iType0 , typename iType1 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            traits::rank == 2 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
-    }
-
-  template< typename iType0 , typename iType1 , typename iType2 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            std::is_integral<iType2>::value &&
-                            traits::rank == 3 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
-    }
-
-  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            std::is_integral<iType2>::value &&
-                            std::is_integral<iType3>::value &&
-                            traits::rank == 4 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
-    }
-
-  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
-            typename iType4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            std::is_integral<iType2>::value &&
-                            std::is_integral<iType3>::value &&
-                            std::is_integral<iType4>::value &&
-                            traits::rank == 5 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
-                 const iType4 & i4 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
-    }
-
-  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
-            typename iType4 , typename iType5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            std::is_integral<iType2>::value &&
-                            std::is_integral<iType3>::value &&
-                            std::is_integral<iType4>::value &&
-                            std::is_integral<iType5>::value &&
-                            traits::rank == 6 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
-                 const iType4 & i4 , const iType5 & i5 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
-    }
-
-  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
-            typename iType4 , typename iType5 , typename iType6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            std::is_integral<iType2>::value &&
-                            std::is_integral<iType3>::value &&
-                            std::is_integral<iType4>::value &&
-                            std::is_integral<iType5>::value &&
-                            std::is_integral<iType6>::value &&
-                            traits::rank == 7 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
-                 const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
-    }
-
-  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
-            typename iType4 , typename iType5 , typename iType6 , typename iType7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( std::is_integral<iType0>::value &&
-                            std::is_integral<iType1>::value &&
-                            std::is_integral<iType2>::value &&
-                            std::is_integral<iType3>::value &&
-                            std::is_integral<iType4>::value &&
-                            std::is_integral<iType5>::value &&
-                            std::is_integral<iType6>::value &&
-                            std::is_integral<iType7>::value &&
-                            traits::rank == 8 )
-                         , typename traits::value_type &
-                         >::type
-    operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
-                 const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
-    {
-      return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
-    }
-};
-
-namespace Impl {
-template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
-struct delete_segmented_view {
-  typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
-  typedef typename view_type::execution_space execution_space;
-
-  view_type view_;
-  delete_segmented_view(view_type view):view_(view) {
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (int i) const {
-    delete [] view_.get_segment(i).ptr_on_device();
-  }
-};
-
-}
-}
-}
-
-#endif
-
-#endif
diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
index 7a916c6ef7c449a041d6d2014033e34c3342f185..8646d277921aff5c71b70c48d768ee39944b3455 100644
--- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
+++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
@@ -241,9 +241,9 @@ public:
   typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type>             modifiable_map_type;
   typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type>       const_map_type;
 
-  static const bool is_set = Impl::is_same<void,value_type>::value;
-  static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
-  static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
+  static const bool is_set = std::is_same<void,value_type>::value;
+  static const bool has_const_key = std::is_same<const_key_type,declared_key_type>::value;
+  static const bool has_const_value = is_set || std::is_same<const_value_type,declared_value_type>::value;
 
   static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
   static const bool is_modifiable_map = has_const_key && !has_const_value;
@@ -735,8 +735,8 @@ public:
   }
 
   template <typename SKey, typename SValue, typename SDevice>
-  typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
-                            Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
+  typename Impl::enable_if< std::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
+                            std::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
                           >::type
   create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
   {
diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt
index 7fff0f835bb2e704914fe5df16556d6c4199a916..b9d860f32fd854a59e0258adabdc540a1ef0c512 100644
--- a/lib/kokkos/containers/unit_tests/CMakeLists.txt
+++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt
@@ -1,6 +1,6 @@
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
 
 SET(SOURCES
diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile
index 48e3ff61d04b9de210a7f1976217f4d1aca9e8e8..c45e2be05ed73633331b775c1e71195e8d844acc 100644
--- a/lib/kokkos/containers/unit_tests/Makefile
+++ b/lib/kokkos/containers/unit_tests/Makefile
@@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
 default: build_all
 	echo "End Build"
 
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= -lpthread
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
 
 TEST_TARGETS = 
diff --git a/lib/kokkos/containers/unit_tests/TestCuda.cpp b/lib/kokkos/containers/unit_tests/TestCuda.cpp
index e30160b24e3a57d927924067d171ee8b49540357..6be38cd7a762c2c376f0fdc36e1dfb8b0b54b251 100644
--- a/lib/kokkos/containers/unit_tests/TestCuda.cpp
+++ b/lib/kokkos/containers/unit_tests/TestCuda.cpp
@@ -59,11 +59,13 @@
 #include <TestVector.hpp>
 #include <TestDualView.hpp>
 #include <TestDynamicView.hpp>
-#include <TestSegmentedView.hpp>
 
 #include <Kokkos_DynRankView.hpp>
 #include <TestDynViewAPI.hpp>
 
+#include <Kokkos_ErrorReporter.hpp>
+#include <TestErrorReporter.hpp>
+
 //----------------------------------------------------------------------------
 
 
@@ -133,11 +135,6 @@ void cuda_test_dualview_combinations(unsigned int size)
   test_dualview_combinations<int,Kokkos::Cuda>(size);
 }
 
-void cuda_test_segmented_view(unsigned int size)
-{
-  test_segmented_view<double,Kokkos::Cuda>(size);
-}
-
 void cuda_test_bitset()
 {
   test_bitset<Kokkos::Cuda>();
@@ -184,11 +181,6 @@ void cuda_test_bitset()
       cuda_test_dualview_combinations(size);                     \
   }
 
-#define CUDA_SEGMENTEDVIEW_TEST( size )                             \
-  TEST_F( cuda, segmentedview_##size##x) {       \
-      cuda_test_segmented_view(size);                     \
-  }
-
 CUDA_DUALVIEW_COMBINE_TEST( 10 )
 CUDA_VECTOR_COMBINE_TEST( 10 )
 CUDA_VECTOR_COMBINE_TEST( 3057 )
@@ -198,7 +190,6 @@ CUDA_INSERT_TEST(close,               100000, 90000, 100, 500)
 CUDA_INSERT_TEST(far,                 100000, 90000, 100, 500)
 CUDA_DEEP_COPY( 10000, 1 )
 CUDA_FAILED_INSERT_TEST( 10000, 1000 )
-CUDA_SEGMENTEDVIEW_TEST( 200 )
 
 
 #undef CUDA_INSERT_TEST
@@ -207,7 +198,6 @@ CUDA_SEGMENTEDVIEW_TEST( 200 )
 #undef CUDA_DEEP_COPY
 #undef CUDA_VECTOR_COMBINE_TEST
 #undef CUDA_DUALVIEW_COMBINE_TEST
-#undef CUDA_SEGMENTEDVIEW_TEST
 
 
 TEST_F( cuda , dynamic_view )
@@ -221,6 +211,18 @@ TEST_F( cuda , dynamic_view )
 }
 
 
+#if defined(KOKKOS_CLASS_LAMBDA)
+TEST_F(cuda, ErrorReporterViaLambda)
+{
+  TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Cuda>>();
+}
+#endif
+
+TEST_F(cuda, ErrorReporter)
+{
+  TestErrorReporter<ErrorReporterDriver<Kokkos::Cuda>>();
+}
+
 }
 
 #endif  /* #ifdef KOKKOS_HAVE_CUDA */
diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp
index e71ccc0091f0ad8c67de46fe91b4b08e43dcc27d..d06277864486e2a80755629d6741f1b7f935fd37 100644
--- a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp
@@ -715,9 +715,9 @@ public:
   typedef Kokkos::Experimental::DynRankView< T, device, Kokkos::MemoryUnmanaged > dView0_unmanaged ;
   typedef typename dView0::host_mirror_space host_drv_space ;
 
-  typedef Kokkos::Experimental::View< T , device >        View0 ;
-  typedef Kokkos::Experimental::View< T* , device >       View1 ;
-  typedef Kokkos::Experimental::View< T******* , device > View7 ;
+  typedef Kokkos::View< T , device >        View0 ;
+  typedef Kokkos::View< T* , device >       View1 ;
+  typedef Kokkos::View< T******* , device > View7 ;
 
   typedef typename View0::host_mirror_space  host_view_space ;
 
@@ -1127,8 +1127,7 @@ public:
     // T v2 = hx(0,0) ; // Generates compile error as intended
     // hx(0,0) = v2 ;   // Generates compile error as intended
 
-/*
-#if ! KOKKOS_USING_EXP_VIEW
+#if 0 /* Asynchronous deep copies not implemented for dynamic rank view */
     // Testing with asynchronous deep copy with respect to device
     {
       size_t count = 0 ;
@@ -1193,7 +1192,7 @@ public:
         { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); }
       }}}}
     }
-#endif */ // #if ! KOKKOS_USING_EXP_VIEW
+#endif
 
     // Testing with synchronous deep copy
     {
diff --git a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c431b62a5380b82bb5a00da4ac8d63411cbe2f78
--- /dev/null
+++ b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
@@ -0,0 +1,227 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP
+#define KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+// Just save the data in the report.  Informative text goies in the operator<<(..).
+template <typename DataType1, typename DataType2, typename DataType3>
+struct ThreeValReport
+{
+  DataType1 m_data1;
+  DataType2 m_data2;
+  DataType3 m_data3;
+
+};
+
+template <typename DataType1, typename DataType2, typename DataType3>
+std::ostream &operator<<(std::ostream & os, const ThreeValReport<DataType1, DataType2, DataType3> &val)
+{
+  return os << "{" << val.m_data1 << " " << val.m_data2 << " " << val.m_data3 << "}";
+}
+
+template<typename ReportType>
+void checkReportersAndReportsAgree(const std::vector<int> &reporters,
+                                   const std::vector<ReportType> &reports)
+{
+  for (size_t i = 0; i < reports.size(); ++i) {
+    EXPECT_EQ(1, reporters[i] % 2);
+    EXPECT_EQ(reporters[i], reports[i].m_data1);
+  }
+}
+
+
+template <typename DeviceType>
+struct ErrorReporterDriverBase {
+
+  typedef ThreeValReport<int, int, double>                                      report_type;
+  typedef Kokkos::Experimental::ErrorReporter<report_type, DeviceType>  error_reporter_type;
+  error_reporter_type m_errorReporter;
+
+  ErrorReporterDriverBase(int reporter_capacity, int test_size)
+    : m_errorReporter(reporter_capacity)  {  }
+
+  KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const { return (work_idx % 2 != 0); }
+
+  void check_expectations(int reporter_capacity, int test_size)
+  {
+    int num_reported = m_errorReporter.getNumReports();
+    int num_attempts = m_errorReporter.getNumReportAttempts();
+
+    int expected_num_reports = std::min(reporter_capacity, test_size / 2);
+    EXPECT_EQ(expected_num_reports, num_reported);
+    EXPECT_EQ(test_size / 2, num_attempts);
+
+    bool expect_full = (reporter_capacity <= (test_size / 2));
+    bool reported_full = m_errorReporter.full();
+    EXPECT_EQ(expect_full, reported_full);
+  }
+};
+
+template <typename ErrorReporterDriverType>
+void TestErrorReporter()
+{
+  typedef ErrorReporterDriverType tester_type;
+  std::vector<int> reporters;
+  std::vector<typename tester_type::report_type> reports;
+
+  tester_type test1(100, 10);
+  test1.m_errorReporter.getReports(reporters, reports);
+  checkReportersAndReportsAgree(reporters, reports);
+
+  tester_type test2(10, 100);
+  test2.m_errorReporter.getReports(reporters, reports);
+  checkReportersAndReportsAgree(reporters, reports);
+
+  typename Kokkos::View<int*, typename ErrorReporterDriverType::execution_space >::HostMirror view_reporters;
+  typename Kokkos::View<typename tester_type::report_type*, typename ErrorReporterDriverType::execution_space >::HostMirror
+     view_reports;
+  test2.m_errorReporter.getReports(view_reporters, view_reports);
+
+  int num_reports = view_reporters.extent(0);
+  reporters.clear();
+  reports.clear();
+  reporters.reserve(num_reports);
+  reports.reserve(num_reports);
+
+  for (int i = 0; i < num_reports; ++i) {
+    reporters.push_back(view_reporters(i));
+    reports.push_back(view_reports(i));
+  }
+  checkReportersAndReportsAgree(reporters, reports);
+
+}
+
+
+template <typename DeviceType>
+struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType>
+{
+  typedef ErrorReporterDriverBase<DeviceType>                             driver_base;
+  typedef typename driver_base::error_reporter_type::execution_space  execution_space;
+
+  ErrorReporterDriver(int reporter_capacity, int test_size)
+    : driver_base(reporter_capacity, test_size)
+  {
+    execute(reporter_capacity, test_size);
+
+    // Test that clear() and resize() work across memory spaces.
+    if (reporter_capacity < test_size) {
+      driver_base::m_errorReporter.clear();
+      driver_base::m_errorReporter.resize(test_size);
+      execute(test_size, test_size);
+    }
+  }
+
+  void execute(int reporter_capacity, int test_size)
+  {
+    Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,test_size), *this);
+    driver_base::check_expectations(reporter_capacity, test_size);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int work_idx) const
+  {
+    if (driver_base::error_condition(work_idx)) {
+      double val = M_PI * static_cast<double>(work_idx);
+      typename driver_base::report_type report = {work_idx, -2*work_idx, val};
+      driver_base::m_errorReporter.add_report(work_idx, report);
+    }
+  }
+};
+
+#if defined(KOKKOS_CLASS_LAMBDA)
+template <typename DeviceType>
+struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType>
+{
+
+  typedef ErrorReporterDriverBase<DeviceType>                             driver_base;
+  typedef typename driver_base::error_reporter_type::execution_space  execution_space;
+
+  ErrorReporterDriverUseLambda(int reporter_capacity, int test_size)
+    : driver_base(reporter_capacity, test_size)
+  {
+    Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,test_size), KOKKOS_CLASS_LAMBDA (const int work_idx) {
+      if (driver_base::error_condition(work_idx)) {
+        double val = M_PI * static_cast<double>(work_idx);
+        typename driver_base::report_type report = {work_idx, -2*work_idx, val};
+        driver_base::m_errorReporter.add_report(work_idx, report);
+      }
+    });
+    driver_base::check_expectations(reporter_capacity, test_size);
+  }
+
+};
+#endif
+
+
+#ifdef KOKKOS_HAVE_OPENMP
+struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP>
+{
+  typedef ErrorReporterDriverBase<Kokkos::OpenMP>  driver_base;
+  typedef typename driver_base::error_reporter_type::execution_space  execution_space;
+
+  ErrorReporterDriverNativeOpenMP(int reporter_capacity, int test_size)
+    : driver_base(reporter_capacity, test_size)
+  {
+#pragma omp parallel for
+    for(int work_idx = 0; work_idx < test_size; ++work_idx)
+    {
+      if (driver_base::error_condition(work_idx)) {
+        double val = M_PI * static_cast<double>(work_idx);
+        typename driver_base::report_type report = {work_idx, -2*work_idx, val};
+        driver_base::m_errorReporter.add_report(work_idx, report);
+      }
+    };
+    driver_base::check_expectations(reporter_capacity, test_size);
+  }
+};
+#endif
+
+} // namespace Test
+#endif // #ifndef KOKKOS_TEST_ERROR_REPORTING_HPP
diff --git a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp
index a4319f39ff7ce626f45a3b7cd3fe9b2a823d1132..598a296c78a563043f0abd04c100db064151241f 100644
--- a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp
+++ b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp
@@ -56,12 +56,14 @@
 #include <TestVector.hpp>
 #include <TestDualView.hpp>
 #include <TestDynamicView.hpp>
-#include <TestSegmentedView.hpp>
 #include <TestComplex.hpp>
 
 #include <Kokkos_DynRankView.hpp>
 #include <TestDynViewAPI.hpp>
 
+#include <Kokkos_ErrorReporter.hpp>
+#include <TestErrorReporter.hpp>
+
 #include <iomanip>
 
 namespace Test {
@@ -143,11 +145,6 @@ TEST_F( openmp , staticcrsgraph )
       test_dualview_combinations<int,Kokkos::OpenMP>(size);                     \
   }
 
-#define OPENMP_SEGMENTEDVIEW_TEST( size )                             \
-  TEST_F( openmp, segmentedview_##size##x) {       \
-      test_segmented_view<double,Kokkos::OpenMP>(size);                     \
-  }
-
 OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
 OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
 OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
@@ -156,7 +153,6 @@ OPENMP_DEEP_COPY( 10000, 1 )
 OPENMP_VECTOR_COMBINE_TEST( 10 )
 OPENMP_VECTOR_COMBINE_TEST( 3057 )
 OPENMP_DUALVIEW_COMBINE_TEST( 10 )
-OPENMP_SEGMENTEDVIEW_TEST( 10000 )
 
 #undef OPENMP_INSERT_TEST
 #undef OPENMP_FAILED_INSERT_TEST
@@ -164,7 +160,6 @@ OPENMP_SEGMENTEDVIEW_TEST( 10000 )
 #undef OPENMP_DEEP_COPY
 #undef OPENMP_VECTOR_COMBINE_TEST
 #undef OPENMP_DUALVIEW_COMBINE_TEST
-#undef OPENMP_SEGMENTEDVIEW_TEST
 #endif
 
 
@@ -178,5 +173,22 @@ TEST_F( openmp , dynamic_view )
   }
 }
 
+#if defined(KOKKOS_CLASS_LAMBDA)
+TEST_F(openmp, ErrorReporterViaLambda)
+{
+  TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::OpenMP>>();
+}
+#endif
+
+TEST_F(openmp, ErrorReporter)
+{
+  TestErrorReporter<ErrorReporterDriver<Kokkos::OpenMP>>();
+}
+
+TEST_F(openmp, ErrorReporterNativeOpenMP)
+{
+  TestErrorReporter<ErrorReporterDriverNativeOpenMP>();
+}
+
 } // namespace test
 
diff --git a/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp b/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp
deleted file mode 100644
index bfd66d12a7dc658fe729ce7016b95d5d05c60202..0000000000000000000000000000000000000000
--- a/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp
+++ /dev/null
@@ -1,708 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
-#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <cstdlib>
-#include <cstdio>
-#include <Kokkos_Core.hpp>
-
-#if ! KOKKOS_USING_EXP_VIEW
-
-#include <Kokkos_SegmentedView.hpp>
-#include <impl/Kokkos_Timer.hpp>
-
-namespace Test {
-
-namespace Impl {
-
-  template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
-  struct GrowTest;
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 1> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+team_member.team_size());
-      value += team_idx + team_member.team_rank();
-
-      if((a.dimension_0()>team_idx+team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+team_member.team_rank()))
-        a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
-
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 2> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+ team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        value += team_idx + team_member.team_rank() + 13*k;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
-          a(team_idx+ team_member.team_rank(),k) =
-              team_idx+ team_member.team_rank() + 13*k;
-        }
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 3> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+ team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        for( typename ExecutionSpace::size_type l=0;l<3;l++)
-          value += team_idx + team_member.team_rank() + 13*k + 3*l;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            a(team_idx+ team_member.team_rank(),k,l) =
-                team_idx+ team_member.team_rank() + 13*k + 3*l;
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 4> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+ team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        for( typename ExecutionSpace::size_type l=0;l<3;l++)
-          for( typename ExecutionSpace::size_type m=0;m<2;m++)
-            value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              a(team_idx+ team_member.team_rank(),k,l,m) =
-                  team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 5> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+ team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        for( typename ExecutionSpace::size_type l=0;l<3;l++)
-          for( typename ExecutionSpace::size_type m=0;m<2;m++)
-            for( typename ExecutionSpace::size_type n=0;n<3;n++)
-              value +=
-                  team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                a(team_idx+ team_member.team_rank(),k,l,m,n) =
-                  team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 6> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+ team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        for( typename ExecutionSpace::size_type l=0;l<3;l++)
-          for( typename ExecutionSpace::size_type m=0;m<2;m++)
-            for( typename ExecutionSpace::size_type n=0;n<3;n++)
-              for( typename ExecutionSpace::size_type o=0;o<2;o++)
-              value +=
-                  team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
-                a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
-                    team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 7> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      a.grow(team_member , team_idx+ team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        for( typename ExecutionSpace::size_type l=0;l<3;l++)
-          for( typename ExecutionSpace::size_type m=0;m<2;m++)
-            for( typename ExecutionSpace::size_type n=0;n<3;n++)
-              for( typename ExecutionSpace::size_type o=0;o<2;o++)
-                for( typename ExecutionSpace::size_type p=0;p<4;p++)
-              value +=
-                  team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
-                  for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
-                a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
-                    team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct GrowTest<ViewType , ExecutionSpace , 8> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    GrowTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-      a.grow(team_member , team_idx + team_member.team_size());
-
-      for( typename ExecutionSpace::size_type k=0;k<7;k++)
-        for( typename ExecutionSpace::size_type l=0;l<3;l++)
-          for( typename ExecutionSpace::size_type m=0;m<2;m++)
-            for( typename ExecutionSpace::size_type n=0;n<3;n++)
-              for( typename ExecutionSpace::size_type o=0;o<2;o++)
-                for( typename ExecutionSpace::size_type p=0;p<4;p++)
-                  for( typename ExecutionSpace::size_type q=0;q<3;q++)
-              value +=
-                  team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
-                  for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
-                    for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
-                a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
-                    team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
-  struct VerifyTest;
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 1> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        value += a(team_idx+ team_member.team_rank());
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 2> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          value += a(team_idx+ team_member.team_rank(),k);
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 3> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            value += a(team_idx+ team_member.team_rank(),k,l);
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 4> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              value += a(team_idx+ team_member.team_rank(),k,l,m);
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 5> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                value += a(team_idx+ team_member.team_rank(),k,l,m,n);
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 6> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
-                  value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 7> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
-                  for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
-                    value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
-      }
-    }
-  };
-
-  template<class ViewType , class ExecutionSpace>
-  struct VerifyTest<ViewType , ExecutionSpace , 8> {
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-    typedef typename Policy::member_type team_type;
-    typedef double value_type;
-
-    ViewType a;
-
-    VerifyTest(ViewType in):a(in) {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator() (team_type team_member, double& value) const {
-      unsigned int team_idx = team_member.league_rank() * team_member.team_size();
-
-      if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
-         (a.dimension(0)>team_idx+ team_member.team_rank())) {
-        for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
-          for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
-            for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
-              for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
-                for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
-                  for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
-                    for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
-                      value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
-      }
-    }
-  };
-
-  template <typename Scalar, class ExecutionSpace>
-  struct test_segmented_view
-  {
-    typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
-
-    typedef Scalar scalar_type;
-    typedef ExecutionSpace execution_space;
-    typedef Kokkos::TeamPolicy<execution_space> Policy;
-
-    double result;
-    double reference;
-
-    template <class ViewType>
-    void run_me(ViewType a, int max_length){
-      const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
-      const int nteams = max_length/team_size;
-
-      reference = 0;
-      result = 0;
-
-      Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
-      Kokkos::fence();
-      Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
-      Kokkos::fence();
-    }
-
-
-    test_segmented_view(unsigned int size,int rank)
-    {
-      reference = 0;
-      result = 0;
-
-      const int dim_1 = 7;
-      const int dim_2 = 3;
-      const int dim_3 = 2;
-      const int dim_4 = 3;
-      const int dim_5 = 2;
-      const int dim_6 = 4;
-      //const int dim_7 = 3;
-
-      if(rank==1) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
-        run_me< rank1_view >(rank1_view("Rank1",128,size), size);
-      }
-      if(rank==2) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
-        run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
-      }
-      if(rank==3) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
-        run_me< rank3_view >(rank3_view("Rank3",128,size), size);
-      }
-      if(rank==4) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
-        run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
-      }
-      if(rank==5) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
-        run_me< rank5_view >(rank5_view("Rank5",128,size), size);
-      }
-      if(rank==6) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
-        run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
-      }
-      if(rank==7) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
-        run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
-      }
-      if(rank==8) {
-        typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
-        run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
-      }
-    }
-
-   };
-
-} // namespace Impl
-
-
-
-
-template <typename Scalar, class ExecutionSpace>
-void test_segmented_view(unsigned int size)
-{
-  {
-    typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
-    view_type a("A",128,size,7,3,2,3);
-    double reference;
-
-    Impl::GrowTest<view_type,ExecutionSpace> f(a);
-
-    const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
-    const int nteams = (size+team_size-1)/team_size;
-
-    Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
-
-    size_t real_size = ((size+127)/128)*128;
-
-    ASSERT_EQ(real_size,a.dimension_0());
-    ASSERT_EQ(7,a.dimension_1());
-    ASSERT_EQ(3,a.dimension_2());
-    ASSERT_EQ(2,a.dimension_3());
-    ASSERT_EQ(3,a.dimension_4());
-    ASSERT_EQ(2,a.dimension_5());
-    ASSERT_EQ(4,a.dimension_6());
-    ASSERT_EQ(3,a.dimension_7());
-    ASSERT_EQ(real_size,a.dimension(0));
-    ASSERT_EQ(7,a.dimension(1));
-    ASSERT_EQ(3,a.dimension(2));
-    ASSERT_EQ(2,a.dimension(3));
-    ASSERT_EQ(3,a.dimension(4));
-    ASSERT_EQ(2,a.dimension(5));
-    ASSERT_EQ(4,a.dimension(6));
-    ASSERT_EQ(3,a.dimension(7));
-    ASSERT_EQ(8,a.Rank);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
-    ASSERT_EQ(test.reference,test.result);
-  }
-  {
-    Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
-    ASSERT_EQ(test.reference,test.result);
-  }
-
-}
-
-
-} // namespace Test
-
-#else
-
-template <typename Scalar, class ExecutionSpace>
-void test_segmented_view(unsigned int ) {}
-
-#endif
-
-#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */
-
diff --git a/lib/kokkos/containers/unit_tests/TestSerial.cpp b/lib/kokkos/containers/unit_tests/TestSerial.cpp
index a7c42d27987d2938fb6b10254d72045732e0f74c..2be27ea613c5ecb18bcd72f5dfd9e542b44fec66 100644
--- a/lib/kokkos/containers/unit_tests/TestSerial.cpp
+++ b/lib/kokkos/containers/unit_tests/TestSerial.cpp
@@ -58,7 +58,6 @@
 #include <TestStaticCrsGraph.hpp>
 #include <TestVector.hpp>
 #include <TestDualView.hpp>
-#include <TestSegmentedView.hpp>
 #include <TestDynamicView.hpp>
 #include <TestComplex.hpp>
 
@@ -67,6 +66,9 @@
 #include <Kokkos_DynRankView.hpp>
 #include <TestDynViewAPI.hpp>
 
+#include <Kokkos_ErrorReporter.hpp>
+#include <TestErrorReporter.hpp>
+
 namespace Test {
 
 class serial : public ::testing::Test {
@@ -135,11 +137,6 @@ TEST_F( serial, bitset )
     test_dualview_combinations<int,Kokkos::Serial>(size);               \
   }
 
-#define SERIAL_SEGMENTEDVIEW_TEST( size )                               \
-  TEST_F( serial, segmentedview_##size##x) {                            \
-    test_segmented_view<double,Kokkos::Serial>(size);                   \
-  }
-
 SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
 SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
 SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
@@ -148,7 +145,6 @@ SERIAL_DEEP_COPY( 10000, 1 )
 SERIAL_VECTOR_COMBINE_TEST( 10 )
 SERIAL_VECTOR_COMBINE_TEST( 3057 )
 SERIAL_DUALVIEW_COMBINE_TEST( 10 )
-SERIAL_SEGMENTEDVIEW_TEST( 10000 )
 
 #undef SERIAL_INSERT_TEST
 #undef SERIAL_FAILED_INSERT_TEST
@@ -156,7 +152,6 @@ SERIAL_SEGMENTEDVIEW_TEST( 10000 )
 #undef SERIAL_DEEP_COPY
 #undef SERIAL_VECTOR_COMBINE_TEST
 #undef SERIAL_DUALVIEW_COMBINE_TEST
-#undef SERIAL_SEGMENTEDVIEW_TEST
 
 TEST_F( serial , dynamic_view )
 {
@@ -168,6 +163,19 @@ TEST_F( serial , dynamic_view )
   }
 }
 
+#if defined(KOKKOS_CLASS_LAMBDA)
+TEST_F(serial, ErrorReporterViaLambda)
+{
+  TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Serial>>();
+}
+#endif
+
+TEST_F(serial, ErrorReporter)
+{
+  TestErrorReporter<ErrorReporterDriver<Kokkos::Serial>>();
+}
+
+
 } // namespace Test
 
 #endif // KOKKOS_HAVE_SERIAL
diff --git a/lib/kokkos/containers/unit_tests/TestThreads.cpp b/lib/kokkos/containers/unit_tests/TestThreads.cpp
index 58277528d31d6ea6adae2996f5e8329b2c63b791..3b34006a01b99bff31896447b1838bfa18192f5e 100644
--- a/lib/kokkos/containers/unit_tests/TestThreads.cpp
+++ b/lib/kokkos/containers/unit_tests/TestThreads.cpp
@@ -62,11 +62,13 @@
 #include <TestVector.hpp>
 #include <TestDualView.hpp>
 #include <TestDynamicView.hpp>
-#include <TestSegmentedView.hpp>
 
 #include <Kokkos_DynRankView.hpp>
 #include <TestDynViewAPI.hpp>
 
+#include <Kokkos_ErrorReporter.hpp>
+#include <TestErrorReporter.hpp>
+
 namespace Test {
 
 class threads : public ::testing::Test {
@@ -145,12 +147,6 @@ TEST_F( threads , staticcrsgraph )
       test_dualview_combinations<int,Kokkos::Threads>(size);                     \
   }
 
-#define THREADS_SEGMENTEDVIEW_TEST( size )                             \
-  TEST_F( threads, segmentedview_##size##x) {       \
-      test_segmented_view<double,Kokkos::Threads>(size);                     \
-  }
-
-
 THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
 THREADS_FAILED_INSERT_TEST( 10000, 1000 )
 THREADS_DEEP_COPY( 10000, 1 )
@@ -158,7 +154,6 @@ THREADS_DEEP_COPY( 10000, 1 )
 THREADS_VECTOR_COMBINE_TEST( 10 )
 THREADS_VECTOR_COMBINE_TEST( 3057 )
 THREADS_DUALVIEW_COMBINE_TEST( 10 )
-THREADS_SEGMENTEDVIEW_TEST( 10000 )
 
 
 #undef THREADS_INSERT_TEST
@@ -167,8 +162,6 @@ THREADS_SEGMENTEDVIEW_TEST( 10000 )
 #undef THREADS_DEEP_COPY
 #undef THREADS_VECTOR_COMBINE_TEST
 #undef THREADS_DUALVIEW_COMBINE_TEST
-#undef THREADS_SEGMENTEDVIEW_TEST
-
 
 
 TEST_F( threads , dynamic_view )
@@ -181,6 +174,19 @@ TEST_F( threads , dynamic_view )
   }
 }
 
+
+#if defined(KOKKOS_CLASS_LAMBDA)
+TEST_F(threads, ErrorReporterViaLambda)
+{
+  TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Threads>>();
+}
+#endif
+
+TEST_F(threads, ErrorReporter)
+{
+  TestErrorReporter<ErrorReporterDriver<Kokkos::Threads>>();
+}
+
 } // namespace Test
 
 
diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake
index 34ff0be5d3c6d26761b4758fda5d7217d66660e6..ae9a20c50efeadec69ab22e3365cd3ec26a5e451 100644
--- a/lib/kokkos/core/cmake/Dependencies.cmake
+++ b/lib/kokkos/core/cmake/Dependencies.cmake
@@ -2,3 +2,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
   LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib
   TEST_OPTIONAL_TPLS CUSPARSE
   )
+
+TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)
\ No newline at end of file
diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in
index 27e3ba1c31f56aa35c6487488d96fa71f7b25d99..9359b5a32b71f06230ea8a2e878e0f457f8eee85 100644
--- a/lib/kokkos/core/cmake/KokkosCore_config.h.in
+++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in
@@ -45,6 +45,16 @@
 #define KOKKOS_ENABLE_PROFILING 0
 #endif
 
+#cmakedefine KOKKOS_HAVE_CUDA_RDC
+#ifdef KOKKOS_HAVE_CUDA_RDC
+#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1
+#endif
+
+#cmakedefine KOKKOS_HAVE_CUDA_LAMBDA
+#ifdef KOKKOS_HAVE_CUDA_LAMBDA
+#define KOKKOS_CUDA_USE_LAMBDA 1
+#endif
+
 // Don't forbid users from defining this macro on the command line,
 // but still make sure that CMake logic can control its definition.
 #if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt
index d93ca14d96fe159def46c29165e743313f91c9c4..cae52f1409e43a8adf9046855cc77b24f2dadce7 100644
--- a/lib/kokkos/core/perf_test/CMakeLists.txt
+++ b/lib/kokkos/core/perf_test/CMakeLists.txt
@@ -1,6 +1,6 @@
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
 
 SET(SOURCES
   PerfTestMain.cpp
@@ -19,7 +19,7 @@ TRIBITS_ADD_EXECUTABLE(
   TESTONLYLIBS kokkos_gtest
   )
 
-TRIBITS_ADD_EXECUTABLE_AND_TEST(
+TRIBITS_ADD_TEST(
   PerfTest
   NAME PerfTestExec
   COMM serial mpi
diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile
index 8fa1fbfc3c00795cf0739a95f1fd23a988b30fa6..85f869971a33c349769bd318af28759f3e3eca12 100644
--- a/lib/kokkos/core/perf_test/Makefile
+++ b/lib/kokkos/core/perf_test/Makefile
@@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/core/perf_test
 default: build_all
 	echo "End Build"
 
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?=  -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= -lpthread
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
 
 TEST_TARGETS = 
diff --git a/lib/kokkos/core/perf_test/PerfTestHost.cpp b/lib/kokkos/core/perf_test/PerfTestHost.cpp
index 6a0f2efadacd01e979d3beefd23b617b81acff48..4a05eecfe0eaa93665fe746c96248e09f14dded5 100644
--- a/lib/kokkos/core/perf_test/PerfTestHost.cpp
+++ b/lib/kokkos/core/perf_test/PerfTestHost.cpp
@@ -79,10 +79,21 @@ class host : public ::testing::Test {
 protected:
   static void SetUpTestCase()
   {
-    const unsigned team_count = Kokkos::hwloc::get_available_numa_count();
-    const unsigned threads_per_team = 4 ;
-
-    TestHostDevice::initialize( team_count * threads_per_team );
+    if(Kokkos::hwloc::available()) {
+      const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
+      const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
+      const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
+
+      unsigned threads_count = 0 ;
+
+      threads_count = std::max( 1u , numa_count )
+                    * std::max( 2u , cores_per_numa * threads_per_core );
+                  
+      TestHostDevice::initialize( threads_count );
+    } else {
+      const unsigned thread_count = 4 ;   
+      TestHostDevice::initialize( thread_count );
+    }
   }
 
   static void TearDownTestCase()
diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
deleted file mode 100644
index 4ed7d8e2a8a40ef6434637f3e0ae72266e4c76bb..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
-#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
-
-/* only compile this file if CUDA is enabled for Kokkos */
-#if defined( KOKKOS_HAVE_CUDA )
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-template<>
-struct ViewOperatorBoundsErrorAbort< Kokkos::CudaSpace > {
-  KOKKOS_INLINE_FUNCTION
-  static void apply( const size_t rank
-                   , const size_t n0 , const size_t n1
-                   , const size_t n2 , const size_t n3
-                   , const size_t n4 , const size_t n5
-                   , const size_t n6 , const size_t n7
-                   , const size_t i0 , const size_t i1
-                   , const size_t i2 , const size_t i3
-                   , const size_t i4 , const size_t i5
-                   , const size_t i6 , const size_t i7 )
-    {
-      const int r =
-        ( n0 <= i0 ? 0 :
-        ( n1 <= i1 ? 1 :
-        ( n2 <= i2 ? 2 :
-        ( n3 <= i3 ? 3 :
-        ( n4 <= i4 ? 4 :
-        ( n5 <= i5 ? 5 :
-        ( n6 <= i6 ? 6 : 7 )))))));
-      const size_t n =
-        ( n0 <= i0 ? n0 :
-        ( n1 <= i1 ? n1 :
-        ( n2 <= i2 ? n2 :
-        ( n3 <= i3 ? n3 :
-        ( n4 <= i4 ? n4 :
-        ( n5 <= i5 ? n5 :
-        ( n6 <= i6 ? n6 : n7 )))))));
-      const size_t i =
-        ( n0 <= i0 ? i0 :
-        ( n1 <= i1 ? i1 :
-        ( n2 <= i2 ? i2 :
-        ( n3 <= i3 ? i3 :
-        ( n4 <= i4 ? i4 :
-        ( n5 <= i5 ? i5 :
-        ( n6 <= i6 ? i6 : i7 )))))));
-      printf("Cuda view array bounds error index %d : FAILED %lu < %lu\n" , r , i , n );
-      Kokkos::Impl::cuda_abort("Cuda view array bounds error");
-    }
-};
-
-} // namespace Impl
-} // namespace Experimental
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
-// Via reinterpret_case this can be used to support all scalar types of those sizes.
-// Any other scalar type falls back to either normal reads out of global memory,
-// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
-
-template< typename ValueType , typename AliasType >
-struct CudaTextureFetch {
-
-  ::cudaTextureObject_t   m_obj ;
-  const ValueType       * m_ptr ;
-  int                     m_offset ;
-
-  // Deference operator pulls through texture object and returns by value
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  ValueType operator[]( const iType & i ) const
-    {
-#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
-      AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
-      return  *(reinterpret_cast<ValueType*> (&v));
-#else
-      return m_ptr[ i ];
-#endif
-    }
-
-  // Pointer to referenced memory
-  KOKKOS_INLINE_FUNCTION
-  operator const ValueType * () const { return m_ptr ; }
-
-
-  KOKKOS_INLINE_FUNCTION
-  CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {}
-
-  KOKKOS_INLINE_FUNCTION
-  ~CudaTextureFetch() {}
-
-  KOKKOS_INLINE_FUNCTION
-  CudaTextureFetch( const CudaTextureFetch & rhs )
-    : m_obj(     rhs.m_obj )
-    , m_ptr(     rhs.m_ptr )
-    , m_offset(  rhs.m_offset )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  CudaTextureFetch( CudaTextureFetch && rhs )
-    : m_obj(     rhs.m_obj )
-    , m_ptr(     rhs.m_ptr )
-    , m_offset(  rhs.m_offset )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
-    {
-      m_obj     = rhs.m_obj ;
-      m_ptr     = rhs.m_ptr ;
-      m_offset  = rhs.m_offset ;
-      return *this ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  CudaTextureFetch & operator = ( CudaTextureFetch && rhs )
-    {
-      m_obj     = rhs.m_obj ;
-      m_ptr     = rhs.m_ptr ;
-      m_offset  = rhs.m_offset ;
-      return *this ;
-    }
-
-  // Texture object spans the entire allocation.
-  // This handle may view a subset of the allocation, so an offset is required.
-  template< class CudaMemorySpace >
-  inline explicit
-  CudaTextureFetch( const ValueType * const arg_ptr
-                  , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record
-                  )
-    : m_obj( record.template attach_texture_object< AliasType >() )
-    , m_ptr( arg_ptr )
-    , m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) )
-    {}
-};
-
-#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
-
-template< typename ValueType , typename AliasType >
-struct CudaLDGFetch {
-
-  const ValueType * m_ptr ;
-
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  ValueType operator[]( const iType & i ) const
-    {
-      AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i]));
-      return  *(reinterpret_cast<ValueType*> (&v));
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  operator const ValueType * () const { return m_ptr ; }
-
-  KOKKOS_INLINE_FUNCTION
-  CudaLDGFetch() : m_ptr() {}
-
-  KOKKOS_INLINE_FUNCTION
-  ~CudaLDGFetch() {}
-
-  KOKKOS_INLINE_FUNCTION
-  CudaLDGFetch( const CudaLDGFetch & rhs )
-    : m_ptr( rhs.m_ptr )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  CudaLDGFetch( CudaLDGFetch && rhs )
-    : m_ptr( rhs.m_ptr )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  CudaLDGFetch & operator = ( const CudaLDGFetch & rhs )
-    {
-      m_ptr = rhs.m_ptr ;
-      return *this ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  CudaLDGFetch & operator = ( CudaLDGFetch && rhs )
-    {
-      m_ptr = rhs.m_ptr ;
-      return *this ;
-    }
-
-  template< class CudaMemorySpace >
-  inline explicit
-  CudaTextureFetch( const ValueType * const arg_ptr
-                  , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const &
-                  )
-    : m_ptr( arg_data_ptr )
-    {}
-};
-
-#endif
-
-} // namespace Impl
-} // namespace Experimental
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-/** \brief  Replace Default ViewDataHandle with Cuda texture fetch specialization
- *          if 'const' value type, CudaSpace and random access.
- */
-template< class Traits >
-class ViewDataHandle< Traits ,
-  typename std::enable_if<(
-    // Is Cuda memory space
-    ( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
-      std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )
-    &&
-    // Is a trivial const value of 4, 8, or 16 bytes
-    std::is_trivial<typename Traits::const_value_type>::value
-    &&
-    std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value
-    &&
-    ( sizeof(typename Traits::const_value_type) ==  4 ||
-      sizeof(typename Traits::const_value_type) ==  8 ||
-      sizeof(typename Traits::const_value_type) == 16 )
-    &&
-    // Random access trait
-    ( Traits::memory_traits::RandomAccess != 0 )
-  )>::type >
-{
-public:
-
-  using track_type  = Kokkos::Experimental::Impl::SharedAllocationTracker ;
-
-  using value_type  = typename Traits::const_value_type ;
-  using return_type = typename Traits::const_value_type ; // NOT a reference
-
-  using alias_type = typename std::conditional< ( sizeof(value_type) ==  4 ) , int ,
-                     typename std::conditional< ( sizeof(value_type) ==  8 ) , ::int2 ,
-                     typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void
-                     >::type
-                     >::type
-                     >::type ;
-
-#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
-  using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
-#else
-  using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
-#endif
-
-  KOKKOS_INLINE_FUNCTION
-  static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ )
-    {
-      return arg_handle ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      // Assignment of texture = non-texture requires creation of a texture object
-      // which can only occur on the host.  In addition, 'get_record' is only valid
-      // if called in a host execution space
-      return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() );
-#else
-      Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel");
-      return handle_type();
-#endif
-    }
-};
-
-}
-}
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
-#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
-
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index a4f372d65d1ee6456d9ff6d21cd4775d6fb6c448..8abf2292d9c6dae685d56a51338f712f7ef1d2c8 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -46,6 +46,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <algorithm>
+#include <atomic>
 #include <Kokkos_Macros.hpp>
 
 /* only compile this file if CUDA is enabled for Kokkos */
@@ -58,6 +59,11 @@
 #include <Cuda/Kokkos_Cuda_Internal.hpp>
 #include <impl/Kokkos_Error.hpp>
 
+#if (KOKKOS_ENABLE_PROFILING)
+#include <impl/Kokkos_Profiling_Interface.hpp>
+#endif
+
+
 /*--------------------------------------------------------------------------*/
 /*--------------------------------------------------------------------------*/
 
@@ -65,6 +71,9 @@ namespace Kokkos {
 namespace Impl {
 
 namespace {
+
+  static std::atomic<int> num_uvm_allocations(0) ;
+
    cudaStream_t get_deep_copy_stream() {
      static cudaStream_t s = 0;
      if( s == 0) {
@@ -119,6 +128,7 @@ void CudaSpace::access_error( const void * const )
   Kokkos::Impl::throw_runtime_exception( msg );
 }
 
+
 /*--------------------------------------------------------------------------*/
 
 bool CudaUVMSpace::available()
@@ -133,6 +143,11 @@ bool CudaUVMSpace::available()
 
 /*--------------------------------------------------------------------------*/
 
+int CudaUVMSpace::number_of_allocations()
+{
+  return Kokkos::Impl::num_uvm_allocations.load();
+}
+
 } // namespace Kokkos
 
 /*--------------------------------------------------------------------------*/
@@ -167,7 +182,18 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
 {
   void * ptr = NULL;
 
-  CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );
+  enum { max_uvm_allocations = 65536 };
+
+  if ( arg_alloc_size > 0 ) 
+  {
+    Kokkos::Impl::num_uvm_allocations++;
+
+    if ( Kokkos::Impl::num_uvm_allocations.load() > max_uvm_allocations ) {
+      Kokkos::Impl::throw_runtime_exception( "CudaUVM error: The maximum limit of UVM allocations exceeded (currently 65536)." ) ;
+    }
+
+    CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );
+  } 
 
   return ptr ;
 }
@@ -191,7 +217,10 @@ void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_all
 void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
 {
   try {
-    CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
+    if ( arg_alloc_ptr != nullptr ) {
+      Kokkos::Impl::num_uvm_allocations--;
+      CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
+    }
   } catch(...) {}
 }
 
@@ -202,13 +231,24 @@ void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t
   } catch(...) {}
 }
 
+constexpr const char* CudaSpace::name() {
+  return m_name;
+}
+
+constexpr const char* CudaUVMSpace::name() {
+  return m_name;
+}
+
+constexpr const char* CudaHostPinnedSpace::name() {
+  return m_name;
+}
+
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 SharedAllocationRecord< void , void >
@@ -335,6 +375,18 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
 SharedAllocationRecord< Kokkos::CudaSpace , void >::
 ~SharedAllocationRecord()
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+
+    SharedAllocationHeader header ;
+    Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) );
+
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::CudaSpace::name()),header.m_label,
+      data(),size());
+  }
+  #endif
+
   m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
                     , SharedAllocationRecord< void , void >::m_alloc_size
                     );
@@ -343,6 +395,15 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::
 SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
 ~SharedAllocationRecord()
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::fence(); //Make sure I can access the label ...
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::CudaUVMSpace::name()),RecordBase::m_alloc_ptr->m_label,
+      data(),size());
+  }
+  #endif
+
   m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
                     , SharedAllocationRecord< void , void >::m_alloc_size
                     );
@@ -351,6 +412,14 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
 SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
 ~SharedAllocationRecord()
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::CudaHostPinnedSpace::name()),RecordBase::m_alloc_ptr->m_label,
+      data(),size());
+  }
+  #endif
+
   m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
                     , SharedAllocationRecord< void , void >::m_alloc_size
                     );
@@ -373,6 +442,12 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
   , m_tex_obj( 0 )
   , m_space( arg_space )
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+  }
+  #endif
+
   SharedAllocationHeader header ;
 
   // Fill in the Header information
@@ -404,7 +479,12 @@ SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
   , m_tex_obj( 0 )
   , m_space( arg_space )
 {
-  // Fill in the Header information, directly accessible via UVM
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+  }
+  #endif
+ // Fill in the Header information, directly accessible via UVM
 
   RecordBase::m_alloc_ptr->m_record = this ;
 
@@ -430,6 +510,11 @@ SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
       )
   , m_space( arg_space )
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+  }
+  #endif
   // Fill in the Header information, directly accessible via UVM
 
   RecordBase::m_alloc_ptr->m_record = this ;
@@ -502,6 +587,7 @@ void SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
 deallocate_tracked( void * const arg_alloc_ptr )
 {
   if ( arg_alloc_ptr != 0 ) {
+
     SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
 
     RecordBase::decrement( r );
@@ -587,7 +673,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr
   RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ;
 
   if ( ! alloc_ptr || record->m_alloc_ptr != head_cuda ) {
-    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
   }
 
 #else
@@ -598,7 +684,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr
   RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );
 
   if ( record == 0 ) {
-    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
   }
 
 #endif
@@ -615,7 +701,7 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_
   Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ;
 
   if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) {
-    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
   }
 
   return static_cast< RecordCuda * >( h->m_record );
@@ -630,7 +716,7 @@ SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void *
   Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ;
 
   if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) {
-    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
   }
 
   return static_cast< RecordCuda * >( h->m_record );
@@ -728,7 +814,6 @@ print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bo
 }
 
 } // namespace Impl
-} // namespace Experimental
 } // namespace Kokkos
 
 /*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
index 2d8d07d0772f2dd2d27a73a4b804f3000953c824..59e79bba2570342b4175252914bd34701c5782b3 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
@@ -384,10 +384,10 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
   const bool ok_id   = 0 <= cuda_device_id &&
                             cuda_device_id < dev_info.m_cudaDevCount ;
 
-  // Need device capability 2.0 or better
+  // Need device capability 3.0 or better
 
   const bool ok_dev = ok_id &&
-    ( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major &&
+    ( 3 <= dev_info.m_cudaProp[ cuda_device_id ].major &&
       0 <= dev_info.m_cudaProp[ cuda_device_id ].minor );
 
   if ( ok_init && ok_dev ) {
@@ -444,7 +444,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
     //----------------------------------
     // Maximum number of blocks:
 
-    m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ;
+    m_maxBlock = cudaProp.maxGridSize[0] ;
 
     //----------------------------------
 
@@ -495,7 +495,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
       msg << dev_info.m_cudaProp[ cuda_device_id ].major ;
       msg << "." ;
       msg << dev_info.m_cudaProp[ cuda_device_id ].minor ;
-      msg << " has insufficient capability, required 2.0 or better" ;
+      msg << " has insufficient capability, required 3.0 or better" ;
     }
     Kokkos::Impl::throw_runtime_exception( msg.str() );
   }
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
index 7afa06fdf5582cd3543294b4156ac90a906a6ce7..12a639fd44aac274c3b7f29c89e850806f8e5ae4 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -95,27 +95,42 @@ private:
 
 public:
 
-#if defined( __CUDA_ARCH__ )
-
-  __device__ inline
+  KOKKOS_INLINE_FUNCTION
   const execution_space::scratch_memory_space & team_shmem() const
     { return m_team_shared.set_team_thread_mode(0,1,0) ; }
-  __device__ inline
+  KOKKOS_INLINE_FUNCTION
   const execution_space::scratch_memory_space & team_scratch(const int& level) const
     { return m_team_shared.set_team_thread_mode(level,1,0) ; }
-  __device__ inline
+  KOKKOS_INLINE_FUNCTION
   const execution_space::scratch_memory_space & thread_scratch(const int& level) const
     { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; }
 
-  __device__ inline int league_rank() const { return m_league_rank ; }
-  __device__ inline int league_size() const { return m_league_size ; }
-  __device__ inline int team_rank() const { return threadIdx.y ; }
-  __device__ inline int team_size() const { return blockDim.y ; }
+  KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
+  KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
+  KOKKOS_INLINE_FUNCTION int team_rank() const {
+    #ifdef __CUDA_ARCH__
+    return threadIdx.y ;
+    #else
+    return 1;
+    #endif
+  }
+  KOKKOS_INLINE_FUNCTION int team_size() const {
+    #ifdef __CUDA_ARCH__
+    return blockDim.y ;
+    #else
+    return 1;
+    #endif
+  }
 
-  __device__ inline void team_barrier() const { __syncthreads(); }
+  KOKKOS_INLINE_FUNCTION void team_barrier() const {
+    #ifdef __CUDA_ARCH__
+    __syncthreads();
+    #endif
+  }
 
   template<class ValueType>
-  __device__ inline void team_broadcast(ValueType& value, const int& thread_id) const {
+  KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value, const int& thread_id) const {
+    #ifdef __CUDA_ARCH__
     __shared__ ValueType sh_val;
     if(threadIdx.x == 0 && threadIdx.y == thread_id) {
       sh_val = value;
@@ -123,26 +138,17 @@ public:
     team_barrier();
     value = sh_val;
     team_barrier();
+    #endif
   }
 
-#ifdef KOKKOS_HAVE_CXX11
   template< class ValueType, class JoinOp >
-  __device__ inline
+  KOKKOS_INLINE_FUNCTION
   typename JoinOp::value_type team_reduce( const ValueType & value
-                                         , const JoinOp & op_in ) const
-    {
+                                         , const JoinOp & op_in ) const {
+      #ifdef __CUDA_ARCH__
       typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ;
       const JoinOpFunctor op(op_in);
       ValueType * const base_data = (ValueType *) m_team_reduce ;
-#else
-  template< class JoinOp >
-  __device__ inline
-  typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value
-                                         , const JoinOp & op ) const
-    {
-      typedef JoinOp JoinOpFunctor ;
-      typename JoinOp::value_type * const base_data = (typename JoinOp::value_type *) m_team_reduce ;
-#endif
 
       __syncthreads(); // Don't write in to shared data until all threads have entered this function
 
@@ -153,6 +159,9 @@ public:
       Impl::cuda_intra_block_reduce_scan<false,JoinOpFunctor,void>( op , base_data );
 
       return base_data[ blockDim.y - 1 ];
+      #else
+      return typename JoinOp::value_type();
+      #endif
     }
 
   /** \brief  Intra-team exclusive prefix sum with team_rank() ordering
@@ -165,8 +174,8 @@ public:
    *  non-deterministic.
    */
   template< typename Type >
-  __device__ inline Type team_scan( const Type & value , Type * const global_accum ) const
-    {
+  KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const {
+      #ifdef __CUDA_ARCH__
       Type * const base_data = (Type *) m_team_reduce ;
 
       __syncthreads(); // Don't write in to shared data until all threads have entered this function
@@ -186,6 +195,9 @@ public:
       }
 
       return base_data[ threadIdx.y ];
+      #else
+      return Type();
+      #endif
     }
 
   /** \brief  Intra-team exclusive prefix sum with team_rank() ordering.
@@ -194,13 +206,14 @@ public:
    *    reduction_total = dev.team_scan( value ) + value ;
    */
   template< typename Type >
-  __device__ inline Type team_scan( const Type & value ) const
-    { return this->template team_scan<Type>( value , 0 ); }
+  KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const {
+    return this->template team_scan<Type>( value , 0 );
+  }
 
   //----------------------------------------
   // Private for the driver
 
-  __device__ inline
+  KOKKOS_INLINE_FUNCTION
   CudaTeamMember( void * shared
                 , const int shared_begin
                 , const int shared_size
@@ -210,51 +223,10 @@ public:
                 , const int arg_league_size )
     : m_team_reduce( shared )
     , m_team_shared( ((char *)shared) + shared_begin , shared_size,  scratch_level_1_ptr, scratch_level_1_size)
-    , m_league_rank( arg_league_rank ) 
-    , m_league_size( arg_league_size ) 
+    , m_league_rank( arg_league_rank )
+    , m_league_size( arg_league_size )
     {}
 
-#else
-
-  const execution_space::scratch_memory_space & team_shmem() const
-    { return m_team_shared.set_team_thread_mode(0, 1,0) ; }
-  const execution_space::scratch_memory_space & team_scratch(const int& level) const
-    { return m_team_shared.set_team_thread_mode(level,1,0) ; }
-  const execution_space::scratch_memory_space & thread_scratch(const int& level) const
-    { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; }
-
-  int league_rank() const {return 0;}
-  int league_size() const {return 1;}
-  int team_rank() const {return 0;}
-  int team_size() const {return 1;}
-
-  void team_barrier() const {}
-  template<class ValueType>
-  void team_broadcast(ValueType& value, const int& thread_id) const {}
-
-  template< class JoinOp >
-  typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value
-                                         , const JoinOp & op ) const {return typename JoinOp::value_type();}
-
-  template< typename Type >
-  Type team_scan( const Type & value , Type * const global_accum ) const {return Type();}
-
-  template< typename Type >
-  Type team_scan( const Type & value ) const {return Type();}
-
-  //----------------------------------------
-  // Private for the driver
-
-  CudaTeamMember( void * shared
-                , const int shared_begin
-                , const int shared_end
-                , void*     scratch_level_1_ptr
-                , const int scratch_level_1_size
-                , const int arg_league_rank
-                , const int arg_league_size );
-
-#endif /* #if ! defined( __CUDA_ARCH__ ) */
-
 };
 
 } // namespace Impl
@@ -356,7 +328,7 @@ public:
     , m_vector_length( 0 )
     , m_team_scratch_size {0,0}
     , m_thread_scratch_size {0,0}
-    , m_chunk_size ( 32 ) 
+    , m_chunk_size ( 32 )
    {}
 
   /** \brief  Specify league size, request team size */
@@ -508,7 +480,7 @@ private:
   typedef typename Policy::work_tag     WorkTag ;
 
   const FunctorType  m_functor ;
-  const Policy       m_policy ;  
+  const Policy       m_policy ;
 
   ParallelFor() = delete ;
   ParallelFor & operator = ( const ParallelFor & ) = delete ;
@@ -638,8 +610,8 @@ public:
 
     }
 
-  ParallelFor( const FunctorType  & arg_functor 
-             , const Policy       & arg_policy 
+  ParallelFor( const FunctorType  & arg_functor
+             , const Policy       & arg_policy
              )
     : m_functor( arg_functor )
     , m_league_size( arg_policy.league_size() )
@@ -680,7 +652,7 @@ template< class FunctorType , class ReducerType, class ... Traits >
 class ParallelReduce< FunctorType
                     , Kokkos::RangePolicy< Traits ... >
                     , ReducerType
-                    , Kokkos::Cuda 
+                    , Kokkos::Cuda
                     >
 {
 private:
@@ -835,23 +807,22 @@ public:
       const int nwork = m_policy.end() - m_policy.begin();
       if ( nwork ) {
         const int block_size = local_block_size( m_functor );
-  
+
         m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ );
         m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
         m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) );
-  
+
         // REQUIRED ( 1 , N , 1 )
         const dim3 block( 1 , block_size , 1 );
         // Required grid.x <= block.y
         const dim3 grid( std::min( int(block.y) , int( ( nwork + block.y - 1 ) / block.y ) ) , 1 , 1 );
-  
+
       const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( m_functor , block.y );
 
-  
       CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem ); // copy to device and execute
-  
+
       Cuda::fence();
-  
+
       if ( m_result_ptr ) {
         if ( m_unified_space ) {
           const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer)  );
@@ -871,8 +842,8 @@ public:
   }
 
   template< class HostViewType >
-  ParallelReduce( const FunctorType  & arg_functor 
-                , const Policy       & arg_policy 
+  ParallelReduce( const FunctorType  & arg_functor
+                , const Policy       & arg_policy
                 , const HostViewType & arg_result
                 , typename std::enable_if<
                    Kokkos::is_view< HostViewType >::value
@@ -925,7 +896,6 @@ private:
   typedef typename ValueTraits::reference_type  reference_type ;
   typedef typename ValueTraits::value_type      value_type ;
 
-
 public:
 
   typedef FunctorType      functor_type ;
@@ -937,7 +907,6 @@ private:
   typedef double DummyShflReductionType;
   typedef int DummySHMEMReductionType;
 
-
   // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == blockDim.z == 1
   // shared memory utilization:
   //
@@ -1058,36 +1027,44 @@ public:
   inline
   void execute()
     {
-      const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) )
-                                               :std::min( m_league_size , m_team_size );
+      const int nwork = m_league_size * m_team_size ;
+      if ( nwork ) {
+        const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) )
+          :std::min( m_league_size , m_team_size );
 
-      m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count );
-      m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
-      m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) );
+        m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count );
+        m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
+        m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) );
 
-      const dim3 block( m_vector_size , m_team_size , 1 );
-      const dim3 grid( block_count , 1 , 1 );
-      const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
+        const dim3 block( m_vector_size , m_team_size , 1 );
+        const dim3 grid( block_count , 1 , 1 );
+        const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
 
-      CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute
+        CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute
 
-      Cuda::fence();
+        Cuda::fence();
 
-      if ( m_result_ptr ) {
-        if ( m_unified_space ) {
-          const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
-          for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+        if ( m_result_ptr ) {
+          if ( m_unified_space ) {
+            const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
+            for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+          }
+          else {
+            const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) );
+            DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size );
+          }
         }
-        else {
-          const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) );
-          DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size );
+      }
+      else {
+        if (m_result_ptr) {
+          ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr );
         }
       }
     }
 
   template< class HostViewType >
-  ParallelReduce( const FunctorType  & arg_functor 
-                , const Policy       & arg_policy 
+  ParallelReduce( const FunctorType  & arg_functor
+                , const Policy       & arg_policy
                 , const HostViewType & arg_result
                 , typename std::enable_if<
                                    Kokkos::is_view< HostViewType >::value
@@ -1106,9 +1083,18 @@ public:
   , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
       Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
                                                                arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
-      arg_policy.vector_length() )
+                                                               arg_policy.vector_length() )
   , m_vector_size( arg_policy.vector_length() )
-  , m_scratch_size{arg_policy.scratch_size(0,m_team_size),arg_policy.scratch_size(1,m_team_size)}
+  , m_scratch_size{
+    arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
+        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+                                                                 arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
+                                                                 arg_policy.vector_length() )
+    ), arg_policy.scratch_size(1,( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
+        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+                                                                 arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
+                                                                 arg_policy.vector_length() )
+        )}
   {
     // Return Init value if the number of worksets is zero
     if( arg_policy.league_size() == 0) {
@@ -1342,7 +1328,7 @@ private:
       }
 
       // Scan block values into locations shared_data[1..blockDim.y]
-      cuda_intra_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , ValueTraits::pointer_type(shared_data+word_count.value) );
+      cuda_intra_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , typename ValueTraits::pointer_type(shared_data+word_count.value) );
 
       {
         size_type * const block_total = shared_data + word_count.value * blockDim.y ;
@@ -1391,32 +1377,32 @@ public:
       const int nwork    = m_policy.end() - m_policy.begin();
       if ( nwork ) {
         enum { GridMaxComputeCapability_2x = 0x0ffff };
-  
+
         const int block_size = local_block_size( m_functor );
-  
+
         const int grid_max =
           ( block_size * block_size ) < GridMaxComputeCapability_2x ?
           ( block_size * block_size ) : GridMaxComputeCapability_2x ;
-  
+
         // At most 'max_grid' blocks:
         const int max_grid = std::min( int(grid_max) , int(( nwork + block_size - 1 ) / block_size ));
-  
+
         // How much work per block:
         const int work_per_block = ( nwork + max_grid - 1 ) / max_grid ;
-  
+
         // How many block are really needed for this much work:
         const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ;
-  
+
         m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x );
         m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 );
-  
+
         const dim3 grid( grid_x , 1 , 1 );
         const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 )
         const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 );
-  
+
         m_final = false ;
         CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
-  
+
         m_final = true ;
         CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
       }
@@ -1490,18 +1476,30 @@ namespace Impl {
 
 #ifdef __CUDA_ARCH__
     __device__ inline
-    ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread, const iType& count):
+    ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count):
     start( threadIdx.x ),
     end( count ),
     increment( blockDim.x )
     {}
+    __device__ inline
+    ThreadVectorRangeBoundariesStruct (const iType& count):
+        start( threadIdx.x ),
+        end( count ),
+        increment( blockDim.x )
+     {}
 #else
     KOKKOS_INLINE_FUNCTION
-    ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count):
+    ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count):
       start( 0 ),
       end( count ),
       increment( 1 )
     {}
+    KOKKOS_INLINE_FUNCTION
+        ThreadVectorRangeBoundariesStruct (const iType& count):
+          start( 0 ),
+          end( count ),
+          increment( 1 )
+        {}
 #endif
     };
 
@@ -1509,22 +1507,24 @@ namespace Impl {
 
 template<typename iType>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>
-  TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& count) {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,count);
+Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >
+TeamThreadRange( const Impl::CudaTeamMember & thread, const iType & count ) {
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count );
 }
 
-template<typename iType>
+template< typename iType1, typename iType2 >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>
-  TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& begin, const iType& end) {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,begin,end);
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::CudaTeamMember >
+TeamThreadRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) {
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) );
 }
 
 template<typename iType>
 KOKKOS_INLINE_FUNCTION
 Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >
-  ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) {
+ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) {
   return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >(thread,count);
 }
 
@@ -1571,9 +1571,10 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Cud
     lambda(i,result);
   }
 
-  Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; });
-  Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; });
-
+  Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src)
+      { dst+=src; });
+  Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src)
+      { dst+=src; });
 #endif
 }
 
@@ -1923,4 +1924,3 @@ namespace Impl {
 #endif /* defined( __CUDACC__ ) */
 
 #endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */
-
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 1778f631c0ef07b2bad25ea2c855e65c258e6f57..f30a0a891f6a2d4d90666c56e50464229bd69d6e 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -139,6 +139,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
                                  typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type const result,
                                  Cuda::size_type * const m_scratch_flags,
                                  const int max_active_thread = blockDim.y) {
+#ifdef __CUDA_ARCH__
   typedef typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type pointer_type;
   typedef typename FunctorValueTraits< FunctorType , ArgTag >::value_type value_type;
 
@@ -213,6 +214,9 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
 
   //The last block has in its thread=0 the global reduction value through "value"
   return last_block;
+#else
+  return true;
+#endif
 }
 
 //----------------------------------------------------------------------------
@@ -290,10 +294,10 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
 
         if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ;
 
-        BLOCK_SCAN_STEP(tdata_inter,n,8)
-        BLOCK_SCAN_STEP(tdata_inter,n,7)
-        BLOCK_SCAN_STEP(tdata_inter,n,6)
-        BLOCK_SCAN_STEP(tdata_inter,n,5)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,8)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,7)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,6)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5)
       }
     }
   }
@@ -308,12 +312,19 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
             ( rtid_intra & 16 ) ? 16 : 0 ))));
 
     if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
-
+    #ifdef KOKKOS_CUDA_CLANG_WORKAROUND
+    BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,1) __syncthreads();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,0) __syncthreads();
+    #else
     BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,0)
+    BLOCK_SCAN_STEP(tdata_intra,n,0) __threadfence_block();
+    #endif
   }
 
 #undef BLOCK_SCAN_STEP
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
index 701d267e1ba39413061afd337ac19c7d6acaacfc..d56de5db60ae71b34481752870c41fdc4d784cb1 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
@@ -43,7 +43,7 @@
 
 #include <Kokkos_Core.hpp>
 
-#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
 
 #include <impl/Kokkos_TaskQueue_impl.hpp>
 
@@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
 
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
 
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
index 9d9347cc8d57c0c04a228fb0291c0f4e90b6243f..479294f3078a4e0d055610cb38b599415bbac921 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -44,7 +44,7 @@
 #ifndef KOKKOS_IMPL_CUDA_TASK_HPP
 #define KOKKOS_IMPL_CUDA_TASK_HPP
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
@@ -99,7 +99,7 @@ public:
 extern template class TaskQueue< Kokkos::Cuda > ;
 
 //----------------------------------------------------------------------------
-/**\brief  Impl::TaskExec<Cuda> is the TaskPolicy<Cuda>::member_type
+/**\brief  Impl::TaskExec<Cuda> is the TaskScheduler<Cuda>::member_type
  *         passed to tasks running in a Cuda space.
  *
  *  Cuda thread blocks for tasking are dimensioned:
@@ -234,19 +234,23 @@ namespace Kokkos {
 
 template<typename iType>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >
-TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread
-               , const iType & count )
+Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >
+TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >(thread,count);
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( thread, count );
 }
 
-template<typename iType>
+template<typename iType1, typename iType2>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >
-TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & start , const iType & end )
+Impl::TeamThreadRangeBoundariesStruct
+  < typename std::common_type<iType1,iType2>::type
+  , Impl::TaskExec< Kokkos::Cuda > >
+TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread
+               , const iType1 & begin, const iType2 & end )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Cuda > >(thread,start,end);
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >(
+           thread, iType(begin), iType(end) );
 }
 
 template<typename iType>
@@ -315,7 +319,7 @@ ValueType shfl_warp_broadcast
 }
 
 // all-reduce across corresponding vector lanes between team members within warp
-// assume vec_length*team_size == warp_size 
+// assume vec_length*team_size == warp_size
 // blockDim.x == vec_length == stride
 // blockDim.y == team_size
 // threadIdx.x == position in vec
@@ -344,7 +348,7 @@ void parallel_reduce
 
 // all-reduce across corresponding vector lanes between team members within warp
 // if no join() provided, use sum
-// assume vec_length*team_size == warp_size 
+// assume vec_length*team_size == warp_size
 // blockDim.x == vec_length == stride
 // blockDim.y == team_size
 // threadIdx.x == position in vec
@@ -372,7 +376,7 @@ void parallel_reduce
 }
 
 // all-reduce within team members within warp
-// assume vec_length*team_size == warp_size 
+// assume vec_length*team_size == warp_size
 // blockDim.x == vec_length == stride
 // blockDim.y == team_size
 // threadIdx.x == position in vec
@@ -397,7 +401,7 @@ void parallel_reduce
 
 // all-reduce within team members within warp
 // if no join() provided, use sum
-// assume vec_length*team_size == warp_size 
+// assume vec_length*team_size == warp_size
 // blockDim.x == vec_length == stride
 // blockDim.y == team_size
 // threadIdx.x == position in vec
@@ -426,7 +430,7 @@ void parallel_reduce
 }
 
 // scan across corresponding vector lanes between team members within warp
-// assume vec_length*team_size == warp_size 
+// assume vec_length*team_size == warp_size
 // blockDim.x == vec_length == stride
 // blockDim.y == team_size
 // threadIdx.x == position in vec
@@ -469,7 +473,7 @@ void parallel_scan
 }
 
 // scan within team member (vector) within warp
-// assume vec_length*team_size == warp_size 
+// assume vec_length*team_size == warp_size
 // blockDim.x == vec_length == stride
 // blockDim.y == team_size
 // threadIdx.x == position in vec
@@ -514,6 +518,6 @@ void parallel_scan
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 #endif /* #ifndef KOKKOS_IMPL_CUDA_TASK_HPP */
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
deleted file mode 100644
index bb3cd2640d79ad980219861a6e4f0c233c0686bb..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
+++ /dev/null
@@ -1,932 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-// Experimental unified task-data parallel manycore LDRD
-
-#include <stdio.h>
-#include <iostream>
-#include <sstream>
-#include <Kokkos_Core.hpp>
-#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp>
-
-#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY )
-
-// #define DETAILED_PRINT
-
-//----------------------------------------------------------------------------
-
-#define QLOCK   reinterpret_cast<void*>( ~((uintptr_t)0) )
-#define QDENIED reinterpret_cast<void*>( ~((uintptr_t)0) - 1 )
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-void CudaTaskPolicyQueue::Destroy::destroy_shared_allocation()
-{
-  // Verify the queue is empty
-
-  if ( m_policy->m_count_ready ||
-       m_policy->m_team[0] ||
-       m_policy->m_team[1] ||
-       m_policy->m_team[2] ||
-       m_policy->m_serial[0] ||
-       m_policy->m_serial[1] ||
-       m_policy->m_serial[2] ) {
-    Kokkos::abort("CudaTaskPolicyQueue ERROR : Attempt to destroy non-empty queue" );
-  }
-
-  m_policy->~CudaTaskPolicyQueue();
-
-  Kokkos::Cuda::fence();
-}
-
-CudaTaskPolicyQueue::
-~CudaTaskPolicyQueue()
-{
-}
-
-CudaTaskPolicyQueue::
-CudaTaskPolicyQueue
-  ( const unsigned arg_task_max_count
-  , const unsigned arg_task_max_size
-  , const unsigned arg_task_default_dependence_capacity 
-  , const unsigned arg_team_size
-  )
-  : m_space( Kokkos::CudaUVMSpace()
-           , arg_task_max_size * arg_task_max_count * 1.2
-           , 16 /* log2(superblock size) */
-           )
-  , m_team { 0 , 0 , 0 }
-  , m_serial { 0 , 0 , 0 }
-  , m_team_size( 32 /* 1 warps */ )
-  , m_default_dependence_capacity( arg_task_default_dependence_capacity )
-  , m_count_ready(0)
-{
-  constexpr int max_team_size = 32 * 16 /* 16 warps */ ;
-
-  const int target_team_size =
-    std::min( int(arg_team_size) , max_team_size );
-
-  while ( m_team_size < target_team_size ) { m_team_size *= 2 ; }
-}
-
-//-----------------------------------------------------------------------
-// Called by each block & thread
-
-__device__
-void Kokkos::Experimental::Impl::CudaTaskPolicyQueue::driver()
-{
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-
-#define IS_TEAM_LEAD ( threadIdx.x == 0 && threadIdx.y == 0 )
-
-#ifdef DETAILED_PRINT
-if ( IS_TEAM_LEAD ) {
-  printf( "CudaTaskPolicyQueue::driver() begin on %d with count %d\n"
-        , blockIdx.x , m_count_ready );
-}
-#endif
-
-  // Each thread block must iterate this loop synchronously
-  // to insure team-execution of team-task
-
-  __shared__ task_root_type * team_task ;
-
-  __syncthreads();
-
-  do {
-
-    if ( IS_TEAM_LEAD ) {
-      if ( 0 == m_count_ready ) {
-        team_task = q_denied ; // All queues are empty and no running tasks
-      }
-      else {
-        team_task = 0 ;
-        for ( int i = 0 ; i < int(NPRIORITY) && 0 == team_task ; ++i ) {
-          if ( ( i < 2 /* regular queue */ )
-               || ( ! m_space.is_empty() /* waiting for memory */ ) ) {
-            team_task = pop_ready_task( & m_team[i] );
-          }
-        }
-      }
-    }
-
-    __syncthreads();
-
-#ifdef DETAILED_PRINT
-if ( IS_TEAM_LEAD && 0 != team_task ) {
-  printf( "CudaTaskPolicyQueue::driver() (%d) team_task(0x%lx)\n"
-        , blockIdx.x
-        , (unsigned long) team_task );
-}
-#endif
-
-    // team_task == q_denied if all queues are empty
-    // team_task == 0 if no team tasks available
-
-    if ( q_denied != team_task ) {
-      if ( 0 != team_task ) {
-
-        Kokkos::Impl::CudaTeamMember
-          member( kokkos_impl_cuda_shared_memory<void>()
-                , 16                      /* shared_begin */
-                , team_task->m_shmem_size /* shared size */
-                , 0                       /* scratch level 1 pointer */
-                , 0                       /* scratch level 1 size */
-                , 0                       /* league rank */
-                , 1                       /* league size */
-                );
-
-        (*team_task->m_team)( team_task , member );
-
-        // A __synthreads was called and if completed the
-        // functor was destroyed.
-
-        if ( IS_TEAM_LEAD ) {
-          complete_executed_task( team_task );
-        }
-      }
-      else {
-        // One thread of one warp performs this serial task
-        if ( threadIdx.x == 0 &&
-             0 == ( threadIdx.y % 32 ) ) {
-          task_root_type * task = 0 ;
-          for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) {
-            if ( ( i < 2 /* regular queue */ )
-                 || ( ! m_space.is_empty() /* waiting for memory */ ) ) {
-              task = pop_ready_task( & m_serial[i] );
-            }
-          }
-
-#ifdef DETAILED_PRINT
-if ( 0 != task ) {
-  printf( "CudaTaskPolicyQueue::driver() (%2d)(%d) single task(0x%lx)\n"
-        , blockIdx.x
-        , threadIdx.y
-        , (unsigned long) task );
-}
-#endif
-
-          if ( task ) {
-            (*task->m_serial)( task );
-            complete_executed_task( task );
-          }
-        }
-
-        __syncthreads();
-      }
-    }
-  } while ( q_denied != team_task );
-
-#ifdef DETAILED_PRINT
-if ( IS_TEAM_LEAD ) {
-  printf( "CudaTaskPolicyQueue::driver() end on %d with count %d\n"
-        , blockIdx.x , m_count_ready );
-}
-#endif
-
-#undef IS_TEAM_LEAD
-}
-
-//-----------------------------------------------------------------------
-
-__device__
-CudaTaskPolicyQueue::task_root_type *
-CudaTaskPolicyQueue::pop_ready_task(
-  CudaTaskPolicyQueue::task_root_type * volatile * const queue )
-{
-  task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK);
-  task_root_type * task = 0 ;
-  task_root_type * const task_claim = *queue ;
-
-  if ( ( q_lock != task_claim ) && ( 0 != task_claim ) ) {
-
-    // Queue is not locked and not null, try to claim head of queue.
-    // Is a race among threads to claim the queue.
-
-    if ( task_claim == atomic_compare_exchange(queue,task_claim,q_lock) ) {
-
-      // Aquired the task which must be in the waiting state.
-
-      const int claim_state =
-        atomic_compare_exchange( & task_claim->m_state
-                               , int(TASK_STATE_WAITING)
-                               , int(TASK_STATE_EXECUTING) );
-
-      task_root_type * lock_verify = 0 ;
-
-      if ( claim_state == int(TASK_STATE_WAITING) ) {
-
-        // Transitioned this task from waiting to executing
-        // Update the queue to the next entry and release the lock
-
-        task_root_type * const next =
-          *((task_root_type * volatile *) & task_claim->m_next );
-
-        *((task_root_type * volatile *) & task_claim->m_next ) = 0 ;
-
-        lock_verify = atomic_compare_exchange( queue , q_lock , next );
-      }
-
-      if ( ( claim_state != int(TASK_STATE_WAITING) ) |
-           ( q_lock != lock_verify ) ) {
-
-        printf( "CudaTaskPolicyQueue::pop_ready_task(0x%lx) task(0x%lx) state(%d) ERROR %s\n"
-              , (unsigned long) queue
-               , (unsigned long) task
-               , claim_state
-               , ( claim_state != int(TASK_STATE_WAITING)
-                 ? "NOT WAITING"
-                 : "UNLOCK" ) );
-        Kokkos::abort("CudaTaskPolicyQueue::pop_ready_task");
-      }
-
-      task = task_claim ;
-    }
-  }
-  return task ;
-}
-
-//-----------------------------------------------------------------------
-
-__device__
-void CudaTaskPolicyQueue::complete_executed_task(
-  CudaTaskPolicyQueue::task_root_type * task )
-{
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-  
-
-#ifdef DETAILED_PRINT
-printf( "CudaTaskPolicyQueue::complete_executed_task(0x%lx) state(%d) (%d)(%d,%d)\n"
-      , (unsigned long) task
-      , task->m_state
-      , blockIdx.x
-      , threadIdx.x
-      , threadIdx.y
-      );
-#endif
-
-  // State is either executing or if respawned then waiting,
-  // try to transition from executing to complete.
-  // Reads the current value.
-  
-  const int state_old =
-    atomic_compare_exchange( & task->m_state
-                           , int(Kokkos::Experimental::TASK_STATE_EXECUTING)
-                           , int(Kokkos::Experimental::TASK_STATE_COMPLETE) );
-  
-  if ( int(Kokkos::Experimental::TASK_STATE_WAITING) == state_old ) {
-    /* Task requested a respawn so reschedule it */
-    schedule_task( task , false /* not initial spawn */ );
-  }
-  else if ( int(Kokkos::Experimental::TASK_STATE_EXECUTING) == state_old ) {
-    /* Task is complete */
-
-    // Clear dependences of this task before locking wait queue
-    
-    task->clear_dependence();
-    
-    // Stop other tasks from adding themselves to this task's wait queue.
-    // The wait queue is updated concurrently so guard with an atomic.
-    
-    task_root_type * wait_queue     = *((task_root_type * volatile *) & task->m_wait );
-    task_root_type * wait_queue_old = 0 ;
-    
-    do {
-      wait_queue_old = wait_queue ;
-      wait_queue     = atomic_compare_exchange( & task->m_wait , wait_queue_old , q_denied ); 
-    } while ( wait_queue_old != wait_queue );
-    
-    // The task has been removed from ready queue and
-    // execution is complete so decrement the reference count.
-    // The reference count was incremented by the initial spawning.
-    // The task may be deleted if this was the last reference.
-
-    task_root_type::assign( & task , 0 );
-
-    // Pop waiting tasks and schedule them
-    while ( wait_queue ) {
-      task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ;
-      schedule_task( x , false /* not initial spawn */ );
-    }
-  }
-  else {
-    printf( "CudaTaskPolicyQueue::complete_executed_task(0x%lx) ERROR state_old(%d) dep_size(%d)\n"
-           , (unsigned long)( task )
-           , int(state_old)
-           , task->m_dep_size
-           );
-    Kokkos::abort("CudaTaskPolicyQueue::complete_executed_task" );
-  }
-  
-  // If the task was respawned it may have already been
-  // put in a ready queue and the count incremented.
-  // By decrementing the count last it will never go to zero
-  // with a ready or executing task.
-  
-  atomic_fetch_add( & m_count_ready , -1 );
-}
-
-__device__
-void TaskMember< Kokkos::Cuda , void , void >::latch_add( const int k )
-{
-  typedef TaskMember< Kokkos::Cuda , void , void >  task_root_type ;
-
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-
-  const bool ok_input = 0 < k ;
-
-  const int count = ok_input ? atomic_fetch_add( & m_dep_size , -k ) - k 
-                             : k ;
-
-  const bool ok_count = 0 <= count ;
-
-  const int state = 0 != count ? TASK_STATE_WAITING :
-    atomic_compare_exchange( & m_state
-                           , TASK_STATE_WAITING
-                           , TASK_STATE_COMPLETE );
-
-  const bool ok_state = state == TASK_STATE_WAITING ;
-
-  if ( ! ok_count || ! ok_state ) {
-    printf( "CudaTaskPolicyQueue::latch_add[0x%lx](%d) ERROR %s %d\n"
-          , (unsigned long) this
-          , k
-          , ( ! ok_input ? "Non-positive input" :
-            ( ! ok_count ? "Negative count" : "Bad State" ) )
-          , ( ! ok_input ? k :
-            ( ! ok_count ? count : state ) )
-          );
-    Kokkos::abort( "CudaTaskPolicyQueue::latch_add ERROR" );
-  }
-  else if ( 0 == count ) {
-    // Stop other tasks from adding themselves to this latch's wait queue.
-    // The wait queue is updated concurrently so guard with an atomic.
-
-    CudaTaskPolicyQueue & policy    = *m_policy ;
-    task_root_type * wait_queue     = *((task_root_type * volatile *) &m_wait);
-    task_root_type * wait_queue_old = 0 ;
-    
-    do {
-      wait_queue_old = wait_queue ;
-      wait_queue     = atomic_compare_exchange( & m_wait , wait_queue_old , q_denied ); 
-    } while ( wait_queue_old != wait_queue );
-    
-    // Pop waiting tasks and schedule them
-    while ( wait_queue ) {
-      task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ;
-      policy.schedule_task( x , false /* not initial spawn */ );
-    }
-  }
-}
-
-//----------------------------------------------------------------------------
-
-void CudaTaskPolicyQueue::reschedule_task(
-  CudaTaskPolicyQueue::task_root_type * const task )
-{
-  // Reschedule transitions from executing back to waiting.
-  const int old_state =
-    atomic_compare_exchange( & task->m_state
-                           , int(TASK_STATE_EXECUTING)
-                           , int(TASK_STATE_WAITING) );
-
-  if ( old_state != int(TASK_STATE_EXECUTING) ) {
-
-    printf( "CudaTaskPolicyQueue::reschedule_task(0x%lx) ERROR state(%d)\n"
-          , (unsigned long) task
-          , old_state
-          );
-    Kokkos::abort("CudaTaskPolicyQueue::reschedule" );
-  }
-}
-
-KOKKOS_FUNCTION
-void CudaTaskPolicyQueue::schedule_task(
-  CudaTaskPolicyQueue::task_root_type * const task ,
-  const bool initial_spawn )
-{
-  task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK);
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-
-  //----------------------------------------
-  // State is either constructing or already waiting.
-  // If constructing then transition to waiting.
-
-  {
-    const int old_state = atomic_compare_exchange( & task->m_state
-                                                 , int(TASK_STATE_CONSTRUCTING)
-                                                 , int(TASK_STATE_WAITING) );
-
-    // Head of linked list of tasks waiting on this task
-    task_root_type * const waitTask =
-      *((task_root_type * volatile const *) & task->m_wait );
-
-    // Member of linked list of tasks waiting on some other task
-    task_root_type * const next =
-      *((task_root_type * volatile const *) & task->m_next );
-
-    // An incomplete and non-executing task has:
-    //   task->m_state == TASK_STATE_CONSTRUCTING or TASK_STATE_WAITING
-    //   task->m_wait  != q_denied
-    //   task->m_next  == 0
-    //
-    if ( ( q_denied == waitTask ) ||
-         ( 0 != next ) ||
-         ( old_state != int(TASK_STATE_CONSTRUCTING) &&
-           old_state != int(TASK_STATE_WAITING) ) ) {
-      printf( "CudaTaskPolicyQueue::schedule_task(0x%lx) STATE ERROR: state(%d) wait(0x%lx) next(0x%lx)\n"
-            , (unsigned long) task
-            , old_state
-            , (unsigned long) waitTask
-            , (unsigned long) next );
-      Kokkos::abort("CudaTaskPolicyQueue::schedule" );
-    }
-  }
-
-  //----------------------------------------
-
-  if ( initial_spawn ) {
-    // The initial spawn of a task increments the reference count
-    // for the task's existence in either a waiting or ready queue
-    // until the task has completed.
-    // Completing the task's execution is the matching
-    // decrement of the reference count.
-    task_root_type::assign( 0 , task );
-  }
-
-  //----------------------------------------
-  // Insert this task into a dependence task that is not complete.
-  // Push on to that task's wait queue.
-
-  bool attempt_insert_in_queue = true ;
-
-  task_root_type * volatile * queue =
-    task->m_dep_size ? & task->m_dep[0]->m_wait : (task_root_type **) 0 ;
-
-  for ( int i = 0 ; attempt_insert_in_queue && ( 0 != queue ) ; ) {
-
-    task_root_type * const head_value_old = *queue ;
-
-    if ( q_denied == head_value_old ) {
-      // Wait queue is closed because task is complete,
-      // try again with the next dependence wait queue.
-      ++i ;
-      queue = i < task->m_dep_size ? & task->m_dep[i]->m_wait
-                                   : (task_root_type **) 0 ;
-    }
-    else {
-
-      // Wait queue is open and not denied.
-      // Have exclusive access to this task.
-      // Assign m_next assuming a successfull insertion into the queue.
-      // Fence the memory assignment before attempting the CAS.
-
-      *((task_root_type * volatile *) & task->m_next ) = head_value_old ;
-
-      memory_fence();
-
-      // Attempt to insert this task into the queue.
-      // If fails then continue the attempt.
-
-      attempt_insert_in_queue =
-        head_value_old != atomic_compare_exchange(queue,head_value_old,task);
-    }
-  }
-
-  //----------------------------------------
-  // All dependences are complete, insert into the ready list
-
-  if ( attempt_insert_in_queue ) {
-
-    // Increment the count of ready tasks.
-    // Count will be decremented when task is complete.
-
-    atomic_fetch_add( & m_count_ready , 1 );
-
-    queue = task->m_queue ;
-
-    while ( attempt_insert_in_queue ) {
-
-      // A locked queue is being popped.
-
-      task_root_type * const head_value_old = *queue ;
-
-      if ( q_lock != head_value_old ) {
-        // Read the head of ready queue,
-        // if same as previous value then CAS locks the ready queue
-
-        // Have exclusive access to this task,
-        // assign to head of queue, assuming successful insert
-        // Fence assignment before attempting insert.
-        *((task_root_type * volatile *) & task->m_next ) = head_value_old ;
-
-        memory_fence();
-
-        attempt_insert_in_queue =
-          head_value_old != atomic_compare_exchange(queue,head_value_old,task);
-      }
-    }
-  }
-}
-
-void CudaTaskPolicyQueue::deallocate_task
-  ( CudaTaskPolicyQueue::task_root_type * const task )
-{
-  m_space.deallocate( task , task->m_size_alloc );
-}
-
-KOKKOS_FUNCTION
-CudaTaskPolicyQueue::task_root_type *
-CudaTaskPolicyQueue::allocate_task
-  ( const unsigned arg_sizeof_task
-  , const unsigned arg_dep_capacity
-  , const unsigned arg_team_shmem
-  )
-{
-  const unsigned base_size = arg_sizeof_task +
-    ( arg_sizeof_task % sizeof(task_root_type*)
-    ? sizeof(task_root_type*) - arg_sizeof_task % sizeof(task_root_type*)
-    : 0 );
-
-  const unsigned dep_capacity
-    = ~0u == arg_dep_capacity
-    ? m_default_dependence_capacity
-    : arg_dep_capacity ;
-
-  const unsigned size_alloc =
-     base_size + sizeof(task_root_type*) * dep_capacity ;
-
-  task_root_type * const task =
-    reinterpret_cast<task_root_type*>( m_space.allocate( size_alloc ) );
-
-  if ( task != 0 ) {
-
-    // Initialize task's root and value data structure
-    // Calling function must copy construct the functor.
-
-    new( (void*) task ) task_root_type();
-
-    task->m_policy       = this ;
-    task->m_size_alloc   = size_alloc ;
-    task->m_dep_capacity = dep_capacity ;
-    task->m_shmem_size   = arg_team_shmem ;
-
-    if ( dep_capacity ) {
-      task->m_dep =
-        reinterpret_cast<task_root_type**>(
-        reinterpret_cast<unsigned char*>(task) + base_size );
-
-      for ( unsigned i = 0 ; i < dep_capacity ; ++i )
-        task->task_root_type::m_dep[i] = 0 ;
-    }
-  }
-  return  task ;
-}
-
-//----------------------------------------------------------------------------
-
-void CudaTaskPolicyQueue::add_dependence
-  ( CudaTaskPolicyQueue::task_root_type * const after
-  , CudaTaskPolicyQueue::task_root_type * const before
-  )
-{
-  if ( ( after != 0 ) && ( before != 0 ) ) {
-
-    int const state = *((volatile const int *) & after->m_state );
-
-    // Only add dependence during construction or during execution.
-    // Both tasks must have the same policy.
-    // Dependence on non-full memory cannot be mixed with any other dependence.
-
-    const bool ok_state =
-      Kokkos::Experimental::TASK_STATE_CONSTRUCTING == state ||
-      Kokkos::Experimental::TASK_STATE_EXECUTING    == state ;
-
-    const bool ok_capacity =
-      after->m_dep_size < after->m_dep_capacity ;
-
-    const bool ok_policy =
-      after->m_policy == this && before->m_policy == this ;
-
-    if ( ok_state && ok_capacity && ok_policy ) {
-
-      ++after->m_dep_size ;
-
-      task_root_type::assign( after->m_dep + (after->m_dep_size-1) , before );
-
-      memory_fence();
-    }
-    else {
-
-printf( "CudaTaskPolicyQueue::add_dependence( 0x%lx , 0x%lx ) ERROR %s\n"
-      , (unsigned long) after
-      , (unsigned long) before
-      , ( ! ok_state    ? "Task not constructing or executing" :
-        ( ! ok_capacity ? "Task Exceeded dependence capacity"
-                        : "Tasks from different policies" )) );
-
-      Kokkos::abort("CudaTaskPolicyQueue::add_dependence ERROR");
-    }
-  }
-}
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-TaskPolicy< Kokkos::Cuda >::TaskPolicy
-  ( const unsigned arg_task_max_count
-  , const unsigned arg_task_max_size
-  , const unsigned arg_task_default_dependence_capacity
-  , const unsigned arg_task_team_size
-  )
-  : m_track()
-  , m_policy(0)
-{
-  // Allocate the queue data sructure in UVM space
-
-  typedef Kokkos::Experimental::Impl::SharedAllocationRecord
-    < Kokkos::CudaUVMSpace , Impl::CudaTaskPolicyQueue::Destroy > record_type ;
-
-  record_type * record =
-    record_type::allocate( Kokkos::CudaUVMSpace()
-                         , "CudaUVM task queue"
-                         , sizeof(Impl::CudaTaskPolicyQueue)
-                         );
-
-  m_policy = reinterpret_cast< Impl::CudaTaskPolicyQueue * >( record->data() );
-
-  // Tasks are allocated with application's task size + sizeof(task_root_type)
-
-  const size_t full_task_size_estimate =
-    arg_task_max_size +
-    sizeof(task_root_type) +
-    sizeof(task_root_type*) * arg_task_default_dependence_capacity ;
-
-  new( m_policy )
-    Impl::CudaTaskPolicyQueue( arg_task_max_count
-                             , full_task_size_estimate
-                             , arg_task_default_dependence_capacity
-                             , arg_task_team_size );
-
-  record->m_destroy.m_policy = m_policy ;
-  
-  m_track.assign_allocated_record_to_uninitialized( record );
-}
-
-__global__
-static void kokkos_cuda_task_policy_queue_driver
-  ( Kokkos::Experimental::Impl::CudaTaskPolicyQueue * queue )
-{
-  queue->driver();
-}
-
-void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Cuda > & policy )
-{
-  const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 );
-  const dim3 block( 1 , policy.m_policy->m_team_size , 1 );
-
-  const int shared = 0 ; // Kokkos::Impl::CudaTraits::SharedMemoryUsage / 2 ;
-  const cudaStream_t stream = 0 ;
-
-
-#ifdef DETAILED_PRINT
-printf("kokkos_cuda_task_policy_queue_driver grid(%d,%d,%d) block(%d,%d,%d) shared(%d) policy(0x%lx)\n"
-      , grid.x , grid.y , grid.z
-      , block.x , block.y , block.z
-      , shared
-      , (unsigned long)( policy.m_policy ) );
-fflush(stdout);
-#endif
-
-  CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-
-/*
-  CUDA_SAFE_CALL(
-    cudaFuncSetCacheConfig( kokkos_cuda_task_policy_queue_driver
-                          , cudaFuncCachePreferL1 ) );
-
-  CUDA_SAFE_CALL( cudaGetLastError() );
-*/
-
-  kokkos_cuda_task_policy_queue_driver<<< grid , block , shared , stream >>>
-    ( policy.m_policy );
- 
-  CUDA_SAFE_CALL( cudaGetLastError() );
-
-  CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-
-#ifdef DETAILED_PRINT
-printf("kokkos_cuda_task_policy_queue_driver end\n");
-fflush(stdout);
-#endif
-
-}
-
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-typedef TaskMember< Kokkos::Cuda , void , void > Task ;
-
-__host__ __device__
-Task::~TaskMember()
-{
-}
-
-__host__ __device__
-void Task::assign( Task ** const lhs_ptr , Task * rhs )
-{
-  Task * const q_denied = reinterpret_cast<Task*>(QDENIED);
-
-  // Increment rhs reference count.
-  if ( rhs ) { atomic_fetch_add( & rhs->m_ref_count , 1 ); }
-
-  if ( 0 == lhs_ptr ) return ;
-
-  // Must have exclusive access to *lhs_ptr.
-  // Assign the pointer and retrieve the previous value.
-  // Cannot use atomic exchange since *lhs_ptr may be
-  // in Cuda register space.
-
-#if 0
-
-  Task * const old_lhs = *((Task*volatile*)lhs_ptr);
-
-  *((Task*volatile*)lhs_ptr) = rhs ;
-
-  Kokkos::memory_fence();
-
-#else
-
-  Task * const old_lhs = *lhs_ptr ;
-
-  *lhs_ptr = rhs ;
-
-#endif
-
-  if ( old_lhs && rhs && old_lhs->m_policy != rhs->m_policy ) {
-    Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Cuda>::assign ERROR different queues");
-  }
-
-  if ( old_lhs ) {
-
-    Kokkos::memory_fence();
-
-    // Decrement former lhs reference count.
-    // If reference count is zero task must be complete, then delete task.
-    // Task is ready for deletion when  wait == q_denied
-
-    int const count = atomic_fetch_add( & (old_lhs->m_ref_count) , -1 ) - 1 ;
-    int const state = old_lhs->m_state ;
-    Task * const wait = *((Task * const volatile *) & old_lhs->m_wait );
-
-    const bool ok_count = 0 <= count ;
-
-    // If count == 0 then will be deleting
-    // and must either be constructing or complete.
-    const bool ok_state = 0 < count ? true :
-      ( ( state == int(TASK_STATE_CONSTRUCTING) && wait == 0 ) ||
-        ( state == int(TASK_STATE_COMPLETE)     && wait == q_denied ) )
-      &&
-     old_lhs->m_next == 0 &&
-     old_lhs->m_dep_size == 0 ;
-
-    if ( ! ok_count || ! ok_state ) {
-
-      printf( "%s Kokkos::Impl::TaskManager<Kokkos::Cuda>::assign ERROR deleting task(0x%lx) m_ref_count(%d) m_state(%d) m_wait(0x%ld)\n"
-#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_CUDA )
-            , "CUDA "
-#else
-            , "HOST "
-#endif
-            , (unsigned long) old_lhs
-            , count
-            , state
-            , (unsigned long) wait );
-      Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Cuda>::assign ERROR deleting");
-    }
-
-    if ( count == 0 ) {
-      // When 'count == 0' this thread has exclusive access to 'old_lhs'
-
-#ifdef DETAILED_PRINT
-printf( "Task::assign(...) old_lhs(0x%lx) deallocate\n"
-      , (unsigned long) old_lhs
-      );
-#endif
-
-      old_lhs->m_policy->deallocate_task( old_lhs );
-    }
-  }
-}
-
-//----------------------------------------------------------------------------
-
-__device__
-int Task::get_dependence() const
-{
-  return m_dep_size ;
-}
-
-__device__
-Task * Task::get_dependence( int i ) const
-{
-  Task * const t = ((Task*volatile*)m_dep)[i] ;
-
-  if ( Kokkos::Experimental::TASK_STATE_EXECUTING != m_state || i < 0 || m_dep_size <= i || 0 == t ) {
-
-printf( "TaskMember< Cuda >::get_dependence ERROR : task[%lx]{ state(%d) dep_size(%d) dep[%d] = %lx }\n"
-      , (unsigned long) this
-      , m_state
-      , m_dep_size
-      , i
-      , (unsigned long) t
-      );
-
-    Kokkos::abort("TaskMember< Cuda >::get_dependence ERROR");
-  }
-
-  return t ;
-}
-
-//----------------------------------------------------------------------------
-
-__device__ __host__
-void Task::clear_dependence()
-{
-  for ( int i = m_dep_size - 1 ; 0 <= i ; --i ) {
-    assign( m_dep + i , 0 );
-  }
-
-  *((volatile int *) & m_dep_size ) = 0 ;
-
-  memory_fence();
-}
-
-//----------------------------------------------------------------------------
-
-
-//----------------------------------------------------------------------------
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-
-#endif  /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp
deleted file mode 100644
index e71512f0391b3e264341222b82918d9901080061..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp
+++ /dev/null
@@ -1,833 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-// Experimental unified task-data parallel manycore LDRD
-
-#ifndef KOKKOS_CUDA_TASKPOLICY_HPP
-#define KOKKOS_CUDA_TASKPOLICY_HPP
-
-#include <Kokkos_Core_fwd.hpp>
-#include <Kokkos_Cuda.hpp>
-#include <Kokkos_TaskPolicy.hpp>
-
-#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY )
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-struct CudaTaskPolicyQueue ;
-
-/** \brief  Base class for all Kokkos::Cuda tasks */
-template<>
-class TaskMember< Kokkos::Cuda , void , void > {
-public:
-
-  template< class > friend class Kokkos::Experimental::TaskPolicy ;
-  friend struct CudaTaskPolicyQueue ;
-
-  typedef void (* function_single_type) ( TaskMember * );
-  typedef void (* function_team_type)   ( TaskMember * , Kokkos::Impl::CudaTeamMember & );
-
-private:
-
-  CudaTaskPolicyQueue   * m_policy ;
-  TaskMember * volatile * m_queue ;
-  function_team_type      m_team ;    ///< Apply function on CUDA
-  function_single_type    m_serial ;  ///< Apply function on CUDA
-  TaskMember **           m_dep ;     ///< Dependences
-  TaskMember *            m_wait ;    ///< Linked list of tasks waiting on this task
-  TaskMember *            m_next ;    ///< Linked list of tasks waiting on a different task
-  int                    m_dep_capacity ; ///< Capacity of dependences
-  int                    m_dep_size ;     ///< Actual count of dependences
-  int                    m_size_alloc ;
-  int                    m_shmem_size ;
-  int                    m_ref_count ;    ///< Reference count
-  int                    m_state ;        ///< State of the task
-
-
-  TaskMember( TaskMember && ) = delete ;
-  TaskMember( const TaskMember & ) = delete ;
-  TaskMember & operator = ( TaskMember && ) = delete ;
-  TaskMember & operator = ( const TaskMember & ) = delete ;
-
-protected:
-
-  KOKKOS_INLINE_FUNCTION
-  TaskMember()
-    : m_policy(0)
-    , m_queue(0)
-    , m_team(0)
-    , m_serial(0)
-    , m_dep(0)
-    , m_wait(0)
-    , m_next(0)
-    , m_size_alloc(0)
-    , m_dep_capacity(0)
-    , m_dep_size(0)
-    , m_shmem_size(0)
-    , m_ref_count(0)
-    , m_state( TASK_STATE_CONSTRUCTING )
-    {}
-
-public:
-
-  KOKKOS_FUNCTION
-  ~TaskMember();
-
-  KOKKOS_INLINE_FUNCTION
-  int reference_count() const
-    { return *((volatile int *) & m_ref_count ); }
-
-  // Cannot use the function pointer to verify the type
-  // since the function pointer is not unique between
-  // Host and Cuda. Don't run verificaton for Cuda. 
-  // Assume testing on Host-only back-end will catch such errors.
-
-  template< typename ResultType >
-  KOKKOS_INLINE_FUNCTION static
-  TaskMember * verify_type( TaskMember * t ) { return t ; }
-
-  //----------------------------------------
-  /*  Inheritence Requirements on task types:
-   *
-   *    class DerivedTaskType
-   *      : public TaskMember< Cuda , DerivedType::value_type , FunctorType >
-   *      { ... };
-   *
-   *    class TaskMember< Cuda , DerivedType::value_type , FunctorType >
-   *      : public TaskMember< Cuda , DerivedType::value_type , void >
-   *      , public Functor
-   *      { ... };
-   *
-   *  If value_type != void
-   *    class TaskMember< Cuda , value_type , void >
-   *      : public TaskMember< Cuda , void , void >
-   *
-   *  Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
-   *
-   */
-  //----------------------------------------
-  // If after the 'apply' the task's state is waiting 
-  // then it will be rescheduled and called again.
-  // Otherwise the functor must be destroyed.
-
-  template< class DerivedTaskType , class Tag >
-  __device__ static
-  void apply_single(
-    typename std::enable_if
-      <( std::is_same< Tag , void >::value &&
-        std::is_same< typename DerivedTaskType::result_type , void >::value
-       ), TaskMember * >::type t )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-
-      functor_type * const f =
-        static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) );
-
-      f->apply();
-
-      if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-        f->~functor_type();
-      }
-    }
-
-  template< class DerivedTaskType , class Tag >
-  __device__ static
-  void apply_single(
-    typename std::enable_if
-      <( std::is_same< Tag , void >::value &&
-        ! std::is_same< typename DerivedTaskType::result_type , void >::value
-       ), TaskMember * >::type t )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-
-      DerivedTaskType * const self = static_cast< DerivedTaskType * >(t);
-      functor_type    * const f    = static_cast< functor_type * >( self );
-
-      f->apply( self->m_result );
-
-      if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-        f->~functor_type();
-      }
-    }
-
-  template< class DerivedTaskType , class Tag >
-  __device__
-  void set_apply_single()
-    {
-      m_serial = & TaskMember::template apply_single<DerivedTaskType,Tag> ;
-    }
-
-  //----------------------------------------
-
-  template< class DerivedTaskType , class Tag >
-  __device__ static
-  void apply_team(
-    typename std::enable_if
-      <( std::is_same<Tag,void>::value &&
-         std::is_same<typename DerivedTaskType::result_type,void>::value
-       ), TaskMember * >::type t
-    , Kokkos::Impl::CudaTeamMember & member
-    )
-    {
-      typedef typename DerivedTaskType::functor_type functor_type ;
-
-      functor_type * const f =
-        static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) );
-
-      f->apply( member );
-
-      __syncthreads(); // Wait for team to finish calling function
-
-      if ( threadIdx.x == 0 &&
-           threadIdx.y == 0 &&
-           t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-        f->~functor_type();
-      }
-    }
-
-  template< class DerivedTaskType , class Tag >
-  __device__ static
-  void apply_team(
-    typename std::enable_if
-      <( std::is_same<Tag,void>::value &&
-         ! std::is_same<typename DerivedTaskType::result_type,void>::value
-       ), TaskMember * >::type t
-    , Kokkos::Impl::CudaTeamMember & member
-    )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-
-      DerivedTaskType * const self = static_cast< DerivedTaskType * >(t);
-      functor_type    * const f    = static_cast< functor_type * >( self );
-
-      f->apply( member , self->m_result );
-
-      __syncthreads(); // Wait for team to finish calling function
-
-      if ( threadIdx.x == 0 &&
-           threadIdx.y == 0 &&
-           t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-        f->~functor_type();
-      }
-    }
-
-  template< class DerivedTaskType , class Tag >
-  __device__
-  void set_apply_team()
-    {
-      m_team = & TaskMember::template apply_team<DerivedTaskType,Tag> ;
-    }
-
-  //----------------------------------------
-
-  KOKKOS_FUNCTION static
-  void assign( TaskMember ** const lhs , TaskMember * const rhs );
-
-  __device__
-  TaskMember * get_dependence( int i ) const ;
-
-  __device__
-  int get_dependence() const ;
-
-  KOKKOS_FUNCTION void clear_dependence();
-
-  __device__
-  void latch_add( const int k );
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION static
-  void construct_result( TaskMember * const ) {}
-
-  typedef FutureValueTypeIsVoidError get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const { return get_result_type() ; }
-
-  KOKKOS_INLINE_FUNCTION
-  Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
-
-};
-
-/** \brief  A Future< Kokkos::Cuda , ResultType > will cast
- *          from  TaskMember< Kokkos::Cuda , void , void >
- *          to    TaskMember< Kokkos::Cuda , ResultType , void >
- *          to query the result.
- */
-template< class ResultType >
-class TaskMember< Kokkos::Cuda , ResultType , void >
-  : public TaskMember< Kokkos::Cuda , void , void >
-{
-public:
-
-  typedef ResultType result_type ;
-
-  result_type  m_result ;
-
-  typedef const result_type & get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const { return m_result ; }
-
-  KOKKOS_INLINE_FUNCTION static
-  void construct_result( TaskMember * const ptr )
-    {
-      new((void*)(& ptr->m_result)) result_type();
-    }
-
-  TaskMember() = delete ;
-  TaskMember( TaskMember && ) = delete ;
-  TaskMember( const TaskMember & ) = delete ;
-  TaskMember & operator = ( TaskMember && ) = delete ;
-  TaskMember & operator = ( const TaskMember & ) = delete ;
-};
-
-/** \brief  Callback functions will cast
- *          from  TaskMember< Kokkos::Cuda , void , void >
- *          to    TaskMember< Kokkos::Cuda , ResultType , FunctorType >
- *          to execute work functions.
- */
-template< class ResultType , class FunctorType >
-class TaskMember< Kokkos::Cuda , ResultType , FunctorType >
-  : public TaskMember< Kokkos::Cuda , ResultType , void >
-  , public FunctorType
-{
-public:
-  typedef ResultType   result_type ;
-  typedef FunctorType  functor_type ;
-
-  KOKKOS_INLINE_FUNCTION static
-  void copy_construct( TaskMember * const ptr
-                     , const functor_type & arg_functor )
-    {
-      typedef TaskMember< Kokkos::Cuda , ResultType , void > base_type ;
-
-      new((void*)static_cast<FunctorType*>(ptr)) functor_type( arg_functor );
-
-      base_type::construct_result( static_cast<base_type*>( ptr ) );
-    }
-
-  TaskMember() = delete ;
-  TaskMember( TaskMember && ) = delete ;
-  TaskMember( const TaskMember & ) = delete ;
-  TaskMember & operator = ( TaskMember && ) = delete ;
-  TaskMember & operator = ( const TaskMember & ) = delete ;
-};
-
-//----------------------------------------------------------------------------
-
-namespace {
-
-template< class DerivedTaskType , class Tag >
-__global__
-void cuda_set_apply_single( DerivedTaskType * task )
-{
-  typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void >
-    task_root_type ;
-
-  task->task_root_type::template set_apply_single< DerivedTaskType , Tag >();
-}
-
-template< class DerivedTaskType , class Tag >
-__global__
-void cuda_set_apply_team( DerivedTaskType * task )
-{
-  typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void >
-    task_root_type ;
-
-  task->task_root_type::template set_apply_team< DerivedTaskType , Tag >();
-}
-
-} /* namespace */
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-struct CudaTaskPolicyQueue {
-
-  enum { NPRIORITY = 3 };
-
-  // Must use UVM so that tasks can be created in both
-  // Host and Cuda space.
-
-  typedef Kokkos::Experimental::MemoryPool< Kokkos::CudaUVMSpace >
-    memory_space ;
-
-  typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void >
-    task_root_type ;
-
-  memory_space     m_space ;
-  task_root_type * m_team[ NPRIORITY ] ;
-  task_root_type * m_serial[ NPRIORITY ];
-  int              m_team_size ;
-  int              m_default_dependence_capacity ;
-  int volatile     m_count_ready ; ///< Ready plus executing tasks
-
-  // Execute tasks until all non-waiting tasks are complete
-  __device__
-  void driver();
-
-  __device__ static
-  task_root_type * pop_ready_task( task_root_type * volatile * const queue );
-
-  // When a task finishes executing.
-  __device__
-  void complete_executed_task( task_root_type * );
-
-  KOKKOS_FUNCTION void schedule_task( task_root_type * const 
-                                    , const bool initial_spawn = true );
-  KOKKOS_FUNCTION void reschedule_task( task_root_type * const );
-  KOKKOS_FUNCTION
-  void add_dependence( task_root_type * const after
-                     , task_root_type * const before );
-
-
-  CudaTaskPolicyQueue() = delete ;
-  CudaTaskPolicyQueue( CudaTaskPolicyQueue && ) = delete ;
-  CudaTaskPolicyQueue( const CudaTaskPolicyQueue & ) = delete ;
-  CudaTaskPolicyQueue & operator = ( CudaTaskPolicyQueue && ) = delete ;
-  CudaTaskPolicyQueue & operator = ( const CudaTaskPolicyQueue & ) = delete ;
-
-
-  ~CudaTaskPolicyQueue();
-
-  // Construct only on the Host
-  CudaTaskPolicyQueue
-    ( const unsigned arg_task_max_count
-    , const unsigned arg_task_max_size
-    , const unsigned arg_task_default_dependence_capacity
-    , const unsigned arg_task_team_size
-    );
-
-  struct Destroy {
-    CudaTaskPolicyQueue * m_policy ;
-    void destroy_shared_allocation();
-  };
-
-  //----------------------------------------
-  /** \brief  Allocate and construct a task.
-   *
-   *  Allocate space for DerivedTaskType followed
-   *  by TaskMember*[ dependence_capacity ]
-   */
-  KOKKOS_FUNCTION
-  task_root_type *
-  allocate_task( const unsigned arg_sizeof_task
-               , const unsigned arg_dep_capacity
-               , const unsigned arg_team_shmem = 0 );
-
-  KOKKOS_FUNCTION void deallocate_task( task_root_type * const );
-};
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-void wait( TaskPolicy< Kokkos::Cuda > & );
-
-template<>
-class TaskPolicy< Kokkos::Cuda >
-{
-public:
-
-  typedef Kokkos::Cuda                  execution_space ;
-  typedef TaskPolicy                    execution_policy ;
-  typedef Kokkos::Impl::CudaTeamMember  member_type ;
-
-private:
-
-  typedef Impl::TaskMember< Kokkos::Cuda , void , void >  task_root_type ;
-  typedef Kokkos::Experimental::MemoryPool< Kokkos::CudaUVMSpace > memory_space ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
-
-  track_type                   m_track ;
-  Impl::CudaTaskPolicyQueue  * m_policy ;
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION static
-  const task_root_type * get_task_root( const FunctorType * f )
-    {
-      typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
-      return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION static
-  task_root_type * get_task_root( FunctorType * f )
-    {
-      typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
-      return static_cast< task_root_type * >( static_cast< task_type * >(f) );
-    }
-
-public:
-
-  TaskPolicy
-    ( const unsigned arg_task_max_count
-    , const unsigned arg_task_max_size
-    , const unsigned arg_task_default_dependence_capacity = 4
-    , const unsigned arg_task_team_size = 0 /* choose default */
-    );
-
-  KOKKOS_FUNCTION TaskPolicy() = default ;
-  KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
-
-  KOKKOS_FUNCTION
-  int allocated_task_count() const { return 0 ; }
-
-  //----------------------------------------
-  // Create serial-thread task
-  // Main process and tasks must use different functions
-  // to work around CUDA limitation where __host__ __device__
-  // functions are not allowed to invoke templated __global__ functions.
-
-  template< class FunctorType >
-  Future< typename FunctorType::value_type , execution_space >
-  proc_create( const FunctorType & arg_functor
-             , const unsigned      arg_dep_capacity = ~0u ) const
-    {
-      typedef typename FunctorType::value_type  value_type ;
-
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >
-        task_type ;
-
-      task_type * const task =
-        static_cast<task_type*>(
-          m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity ) );
-
-      if ( task ) {
-        // The root part of the class has been constructed.
-        // Must now construct the functor and result specific part.
-
-        task_type::copy_construct( task , arg_functor );
-
-        // Setting the apply pointer on the device requires code
-        // executing on the GPU.  This function is called on the
-        // host process so a kernel must be run.
-
-        // Launching a kernel will cause the allocated task in
-        // UVM memory to be copied to the GPU.
-        // Synchronize to guarantee non-concurrent access
-        // between host and device.
-
-        CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-
-        Impl::cuda_set_apply_single<task_type,void><<<1,1>>>( task );
-
-        CUDA_SAFE_CALL( cudaGetLastError() );
-        CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-      }
-
-      return Future< value_type , execution_space >( task );
-    }
-
-  template< class FunctorType >
-  __device__
-  Future< typename FunctorType::value_type , execution_space >
-  task_create( const FunctorType & arg_functor
-             , const unsigned      arg_dep_capacity = ~0u ) const
-    {
-      typedef typename FunctorType::value_type  value_type ;
-
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >
-        task_type ;
-
-      task_type * const task =
-        static_cast<task_type*>(
-          m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity ) );
-
-      if ( task ) {
-        // The root part of the class has been constructed.
-        // Must now construct the functor and result specific part.
-
-        task_type::copy_construct( task , arg_functor );
-
-        // Setting the apply pointer on the device requires code
-        // executing on the GPU.  If this function is called on the
-        // Host then a kernel must be run.
-
-        task->task_root_type::template set_apply_single< task_type , void >();
-      }
-
-      return Future< value_type , execution_space >( task );
-    }
-
-  //----------------------------------------
-  // Create thread-team task
-  // Main process and tasks must use different functions
-  // to work around CUDA limitation where __host__ __device__
-  // functions are not allowed to invoke templated __global__ functions.
-
-  template< class FunctorType >
-  Future< typename FunctorType::value_type , execution_space >
-  proc_create_team( const FunctorType & arg_functor
-                  , const unsigned      arg_dep_capacity = ~0u ) const
-    {
-      typedef typename FunctorType::value_type  value_type ;
-
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >
-        task_type ;
-
-      const unsigned team_shmem_size =
-        Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::value
-           ( arg_functor , m_policy->m_team_size );
-
-      task_type * const task =
-        static_cast<task_type*>(
-          m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity , team_shmem_size ) );
-
-      if ( task ) {
-        // The root part of the class has been constructed.
-        // Must now construct the functor and result specific part.
-
-        task_type::copy_construct( task , arg_functor );
-
-        // Setting the apply pointer on the device requires code
-        // executing on the GPU.  This function is called on the
-        // host process so a kernel must be run.
-
-        // Launching a kernel will cause the allocated task in
-        // UVM memory to be copied to the GPU.
-        // Synchronize to guarantee non-concurrent access
-        // between host and device.
-
-        CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-
-        Impl::cuda_set_apply_team<task_type,void><<<1,1>>>( task );
-
-        CUDA_SAFE_CALL( cudaGetLastError() );
-        CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-      }
-
-      return Future< value_type , execution_space >( task );
-    }
-
-  template< class FunctorType >
-  __device__
-  Future< typename FunctorType::value_type , execution_space >
-  task_create_team( const FunctorType & arg_functor
-                  , const unsigned      arg_dep_capacity = ~0u ) const
-    {
-      typedef typename FunctorType::value_type  value_type ;
-
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >
-        task_type ;
-
-      const unsigned team_shmem_size =
-        Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::value
-           ( arg_functor , m_policy->m_team_size );
-
-      task_type * const task =
-        static_cast<task_type*>(
-          m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity , team_shmem_size ) );
-
-      if ( task ) {
-        // The root part of the class has been constructed.
-        // Must now construct the functor and result specific part.
-
-        task_type::copy_construct( task , arg_functor );
-
-        // Setting the apply pointer on the device requires code
-        // executing on the GPU.  If this function is called on the
-        // Host then a kernel must be run.
-
-        task->task_root_type::template set_apply_team< task_type , void >();
-      }
-
-      return Future< value_type , execution_space >( task );
-    }
-
-  //----------------------------------------
-
-  Future< Latch , execution_space >
-  KOKKOS_INLINE_FUNCTION
-  create_latch( const int N ) const
-    {
-      task_root_type * const task =
-        m_policy->allocate_task( sizeof(task_root_type) , 0 , 0 );
-      task->m_dep_size = N ; // Using m_dep_size for latch counter
-      task->m_state = TASK_STATE_WAITING ;
-      return Future< Latch , execution_space >( task );
-    }
-
-  //----------------------------------------
-
-  template< class A1 , class A2 , class A3 , class A4 >
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( const Future<A1,A2> & after
-                     , const Future<A3,A4> & before
-                     , typename std::enable_if
-                        < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
-                          &&
-                          std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
-                        >::type * = 0
-                      ) const
-    { m_policy->add_dependence( after.m_task , before.m_task ); }
-
-  template< class FunctorType , class A3 , class A4 >
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( FunctorType * task_functor
-                     , const Future<A3,A4> & before
-                     , typename std::enable_if
-                        < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
-                        >::type * = 0
-                      ) const
-    { m_policy->add_dependence( get_task_root(task_functor) , before.m_task ); }
-
-
-  template< class ValueType >
-  KOKKOS_INLINE_FUNCTION
-  const Future< ValueType , execution_space > &
-    spawn( const Future< ValueType , execution_space > & f 
-         , const bool priority = false ) const
-      {
-        if ( f.m_task ) {
-          f.m_task->m_queue =
-            ( f.m_task->m_team != 0
-            ? & ( m_policy->m_team[   priority ? 0 : 1 ] )
-            : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) );
-          m_policy->schedule_task( f.m_task );
-        }
-        return f ;
-      }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void respawn( FunctorType * task_functor 
-              , const bool priority = false ) const
-    {
-      task_root_type * const t = get_task_root(task_functor);
-      t->m_queue =
-        ( t->m_team != 0 ? & ( m_policy->m_team[   priority ? 0 : 1 ] )
-                         : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) );
-      m_policy->reschedule_task( t );
-    }
-
-  // When a create method fails by returning a null Future
-  // the task that called the create method may respawn
-  // with a dependence on memory becoming available.
-  // This is a race as more than one task may be respawned
-  // with this need.
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void respawn_needing_memory( FunctorType * task_functor ) const
-    {
-      task_root_type * const t = get_task_root(task_functor);
-      t->m_queue =
-        ( t->m_team != 0 ? & ( m_policy->m_team[   2 ] )
-                         : & ( m_policy->m_serial[ 2 ] ) );
-      m_policy->reschedule_task( t );
-    }
-
-  //----------------------------------------
-  // Functions for an executing task functor to query dependences,
-  // set new dependences, and respawn itself.
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< void , execution_space >
-  get_dependence( const FunctorType * task_functor , int i ) const
-    {
-      return Future<void,execution_space>(
-        get_task_root(task_functor)->get_dependence(i)
-      );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  int get_dependence( const FunctorType * task_functor ) const
-    { return get_task_root(task_functor)->get_dependence(); }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void clear_dependence( FunctorType * task_functor ) const
-    { get_task_root(task_functor)->clear_dependence(); }
-
-  //----------------------------------------
-
-  __device__
-  static member_type member_single()
-    {
-      return
-        member_type( 0 /* shared memory pointer */
-                   , 0 /* shared memory begin offset */
-                   , 0 /* shared memory end offset */
-                   , 0 /* scratch level_1 pointer */
-                   , 0 /* scratch level_1 size */
-                   , 0 /* league rank */
-                   , 1 /* league size */ );
-    }
-
-  friend void wait( TaskPolicy< Kokkos::Cuda > & );
-};
-
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-
-//----------------------------------------------------------------------------
-
-#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
-#endif /* #ifndef KOKKOS_CUDA_TASKPOLICY_HPP */
-
-
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index 92f6fc1f5f89a75fe717d351af5395da8bf894a4..b505b766a03cef464b4f880fdc788b6f46ead1c3 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -41,53 +41,266 @@
 //@HEADER
 */
 
-#ifndef KOKKOS_CUDA_VIEW_HPP
-#define KOKKOS_CUDA_VIEW_HPP
-
-#include <Kokkos_Macros.hpp>
+#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
+#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
 
 /* only compile this file if CUDA is enabled for Kokkos */
-#ifdef KOKKOS_HAVE_CUDA
+#if defined( KOKKOS_HAVE_CUDA )
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
 
-#include <cstring>
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
 
-#include <Kokkos_HostSpace.hpp>
-#include <Kokkos_CudaSpace.hpp>
-#include <impl/Kokkos_Shape.hpp>
-#include <Kokkos_View.hpp>
+// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
+// Via reinterpret_case this can be used to support all scalar types of those sizes.
+// Any other scalar type falls back to either normal reads out of global memory,
+// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
+
+template< typename ValueType , typename AliasType >
+struct CudaTextureFetch {
+
+  ::cudaTextureObject_t   m_obj ;
+  const ValueType       * m_ptr ;
+  int                     m_offset ;
+
+  // Deference operator pulls through texture object and returns by value
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator[]( const iType & i ) const
+    {
+#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
+      AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
+      return  *(reinterpret_cast<ValueType*> (&v));
+#else
+      return m_ptr[ i ];
+#endif
+    }
+
+  // Pointer to referenced memory
+  KOKKOS_INLINE_FUNCTION
+  operator const ValueType * () const { return m_ptr ; }
+
+
+  KOKKOS_INLINE_FUNCTION
+  CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {}
+
+  KOKKOS_INLINE_FUNCTION
+  ~CudaTextureFetch() {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaTextureFetch( const CudaTextureFetch & rhs )
+    : m_obj(     rhs.m_obj )
+    , m_ptr(     rhs.m_ptr )
+    , m_offset(  rhs.m_offset )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaTextureFetch( CudaTextureFetch && rhs )
+    : m_obj(     rhs.m_obj )
+    , m_ptr(     rhs.m_ptr )
+    , m_offset(  rhs.m_offset )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
+    {
+      m_obj     = rhs.m_obj ;
+      m_ptr     = rhs.m_ptr ;
+      m_offset  = rhs.m_offset ;
+      return *this ;
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  CudaTextureFetch & operator = ( CudaTextureFetch && rhs )
+    {
+      m_obj     = rhs.m_obj ;
+      m_ptr     = rhs.m_ptr ;
+      m_offset  = rhs.m_offset ;
+      return *this ;
+    }
+
+  // Texture object spans the entire allocation.
+  // This handle may view a subset of the allocation, so an offset is required.
+  template< class CudaMemorySpace >
+  inline explicit
+  CudaTextureFetch( const ValueType * const arg_ptr
+                  , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record
+                  )
+    : m_obj( record.template attach_texture_object< AliasType >() )
+    , m_ptr( arg_ptr )
+    , m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) )
+    {}
+
+  // Texture object spans the entire allocation.
+  // This handle may view a subset of the allocation, so an offset is required.
+  KOKKOS_INLINE_FUNCTION
+  CudaTextureFetch( const CudaTextureFetch & rhs , size_t offset )
+    : m_obj(     rhs.m_obj )
+    , m_ptr(     rhs.m_ptr + offset)
+    , m_offset( offset + rhs.m_offset )
+    {}
+};
+
+#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
+
+template< typename ValueType , typename AliasType >
+struct CudaLDGFetch {
+
+  const ValueType * m_ptr ;
+
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator[]( const iType & i ) const
+    {
+      #ifdef __CUDA_ARCH__
+      AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i]));
+      return  *(reinterpret_cast<ValueType*> (&v));
+      #else
+      return m_ptr[i];
+      #endif
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  operator const ValueType * () const { return m_ptr ; }
+
+  KOKKOS_INLINE_FUNCTION
+  CudaLDGFetch() : m_ptr() {}
+
+  KOKKOS_INLINE_FUNCTION
+  ~CudaLDGFetch() {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaLDGFetch( const CudaLDGFetch & rhs )
+    : m_ptr( rhs.m_ptr )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaLDGFetch( CudaLDGFetch && rhs )
+    : m_ptr( rhs.m_ptr )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaLDGFetch & operator = ( const CudaLDGFetch & rhs )
+    {
+      m_ptr = rhs.m_ptr ;
+      return *this ;
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  CudaLDGFetch & operator = ( CudaLDGFetch && rhs )
+    {
+      m_ptr = rhs.m_ptr ;
+      return *this ;
+    }
+
+  template< class CudaMemorySpace >
+  inline explicit
+  CudaLDGFetch( const ValueType * const arg_ptr
+                  , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const &
+                  )
+    : m_ptr( arg_ptr )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  CudaLDGFetch( CudaLDGFetch const rhs ,size_t offset)
+    : m_ptr( rhs.m_ptr + offset )
+    {}
+
+};
+
+#endif
+
+} // namespace Impl
+} // namespace Experimental
+} // namespace Kokkos
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
+namespace Experimental {
 namespace Impl {
 
-template<>
-struct AssertShapeBoundsAbort< CudaSpace >
+/** \brief  Replace Default ViewDataHandle with Cuda texture fetch specialization
+ *          if 'const' value type, CudaSpace and random access.
+ */
+template< class Traits >
+class ViewDataHandle< Traits ,
+  typename std::enable_if<(
+    // Is Cuda memory space
+    ( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
+      std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )
+    &&
+    // Is a trivial const value of 4, 8, or 16 bytes
+    std::is_trivial<typename Traits::const_value_type>::value
+    &&
+    std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value
+    &&
+    ( sizeof(typename Traits::const_value_type) ==  4 ||
+      sizeof(typename Traits::const_value_type) ==  8 ||
+      sizeof(typename Traits::const_value_type) == 16 )
+    &&
+    // Random access trait
+    ( Traits::memory_traits::RandomAccess != 0 )
+  )>::type >
 {
+public:
+
+  using track_type  = Kokkos::Experimental::Impl::SharedAllocationTracker ;
+
+  using value_type  = typename Traits::const_value_type ;
+  using return_type = typename Traits::const_value_type ; // NOT a reference
+
+  using alias_type = typename std::conditional< ( sizeof(value_type) ==  4 ) , int ,
+                     typename std::conditional< ( sizeof(value_type) ==  8 ) , ::int2 ,
+                     typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void
+                     >::type
+                     >::type
+                     >::type ;
+
+#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
+  using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
+#else
+  using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
+#endif
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ )
+    {
+      return arg_handle ;
+    }
+
   KOKKOS_INLINE_FUNCTION
-  static void apply( const size_t /* rank */ ,
-                     const size_t /* n0 */ , const size_t /* n1 */ ,
-                     const size_t /* n2 */ , const size_t /* n3 */ ,
-                     const size_t /* n4 */ , const size_t /* n5 */ ,
-                     const size_t /* n6 */ , const size_t /* n7 */ ,
-
-                     const size_t /* arg_rank */ ,
-                     const size_t /* i0 */ , const size_t /* i1 */ ,
-                     const size_t /* i2 */ , const size_t /* i3 */ ,
-                     const size_t /* i4 */ , const size_t /* i5 */ ,
-                     const size_t /* i6 */ , const size_t /* i7 */ )
+  static handle_type const assign( handle_type const & arg_handle , size_t offset )
     {
-      Kokkos::abort("Kokkos::View array bounds violation");
+      return handle_type(arg_handle,offset) ;
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
+    {
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+      // Assignment of texture = non-texture requires creation of a texture object
+      // which can only occur on the host.  In addition, 'get_record' is only valid
+      // if called in a host execution space
+      return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() );
+#else
+      Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel");
+      return handle_type();
+#endif
     }
 };
 
+}
 }
 }
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#endif // KOKKOS_HAVE_CUDA
+#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
 #endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
index deb955ccd4755d43a24469171f2689d8c2a87dae..60903b757f921823189e47f2137bfeb714a09db2 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
@@ -47,18 +47,10 @@
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 #include "Kokkos_Macros.hpp"
-#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
+#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
 
 #include <cuda.h>
 
-#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 4010 )
-#error "Cuda version 4.1 or greater required"
-#endif
-
-#if ( __CUDA_ARCH__ < 200 )
-#error "Cuda device capability 2.0 or greater required"
-#endif
-
 extern "C" {
 /*  Cuda runtime function, declared in <crt/device_runtime.h>
  *  Requires capability 2.x or better.
@@ -90,30 +82,6 @@ void cuda_abort( const char * const message )
 
 } // namespace Impl
 } // namespace Kokkos
-
-#else
-
-namespace Kokkos {
-namespace Impl {
-KOKKOS_INLINE_FUNCTION
-void cuda_abort( const char * const ) {}
-}
-}
-
-#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
-namespace Kokkos {
-__device__ inline
-void abort( const char * const message ) { Kokkos::Impl::cuda_abort(message); }
-}
-#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
+#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
 #endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */
 
diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp
index 6d37d69a63c8c837457fb2edba6a6d607103b6ad..3102402b8383beb8ec92bee80af6e4ab7d231dd5 100644
--- a/lib/kokkos/core/src/Kokkos_Atomic.hpp
+++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp
@@ -75,15 +75,16 @@
 #if defined(_WIN32)
 #define KOKKOS_ATOMICS_USE_WINDOWS
 #else
-#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
 
 // Compiling NVIDIA device code, must use Cuda atomics:
 
 #define KOKKOS_ATOMICS_USE_CUDA
+#endif
 
-#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
-      ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
-      ! defined( KOKKOS_ATOMICS_USE_OMP31 )
+#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
+    ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
+    ! defined( KOKKOS_ATOMICS_USE_OMP31 )
 
 // Compiling for non-Cuda atomic implementation has not been pre-selected.
 // Choose the best implementation for the detected compiler.
@@ -91,7 +92,7 @@
 
 #if defined( KOKKOS_COMPILER_GNU ) || \
     defined( KOKKOS_COMPILER_CLANG ) || \
-    ( defined ( KOKKOS_COMPILER_NVCC ) && defined ( __GNUC__ ) )
+    ( defined ( KOKKOS_COMPILER_NVCC ) )
 
 #define KOKKOS_ATOMICS_USE_GCC
 
@@ -126,6 +127,9 @@ namespace Impl {
 /// This function tries to aquire the lock for the hash value derived
 /// from the provided ptr. If the lock is successfully aquired the
 /// function returns true. Otherwise it returns false.
+#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
+extern
+#endif
 __device__ inline
 bool lock_address_cuda_space(void* ptr);
 
@@ -135,6 +139,9 @@ bool lock_address_cuda_space(void* ptr);
 /// from the provided ptr. This function should only be called
 /// after previously successfully aquiring a lock with
 /// lock_address.
+#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
+extern
+#endif
 __device__ inline
 void unlock_address_cuda_space(void* ptr);
 }
@@ -287,7 +294,7 @@ const char * atomic_query_version()
 //----------------------------------------------------------------------------
 // This atomic-style macro should be an inlined function, not a macro
 
-#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__)
+#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__) && !defined(__CUDA_ARCH__)
 
   #define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0)
   #define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0)
diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp
index 82a342eec0bfba9e5420b86d41c586b22969712c..af83e5cac66069c94bc58b8cc22abc968f14ee59 100644
--- a/lib/kokkos/core/src/Kokkos_Concepts.hpp
+++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp
@@ -46,7 +46,14 @@
 
 #include <type_traits>
 
+// Needed for 'is_space<S>::host_mirror_space
+#include <Kokkos_Core_fwd.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
 namespace Kokkos {
+
 //Schedules for Execution Policies
 struct Static {};
 struct Dynamic {};
@@ -59,7 +66,7 @@ struct Schedule
                || std::is_same<T,Dynamic>::value
                , "Kokkos: Invalid Schedule<> type."
                );
-  using schedule_type = Schedule<T>;
+  using schedule_type = Schedule ;
   using type = T;
 };
 
@@ -68,11 +75,268 @@ template<typename T>
 struct IndexType
 {
   static_assert(std::is_integral<T>::value,"Kokkos: Invalid IndexType<>.");
-  using index_type = IndexType<T>;
+  using index_type = IndexType ;
   using type = T;
 };
 
 } // namespace Kokkos
 
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+#define KOKKOS_IMPL_IS_CONCEPT( CONCEPT ) \
+  template< typename T > struct is_ ## CONCEPT { \
+  private: \
+    template< typename , typename = std::true_type > struct have : std::false_type {}; \
+    template< typename U > struct have<U,typename std::is_same<U,typename U:: CONCEPT >::type> : std::true_type {}; \
+  public: \
+    enum { value = is_ ## CONCEPT::template have<T>::value }; \
+  };
+
+// Public concept:
+
+KOKKOS_IMPL_IS_CONCEPT( memory_space )
+KOKKOS_IMPL_IS_CONCEPT( memory_traits )
+KOKKOS_IMPL_IS_CONCEPT( execution_space )
+KOKKOS_IMPL_IS_CONCEPT( execution_policy )
+KOKKOS_IMPL_IS_CONCEPT( array_layout )
+
+namespace Impl {
+
+// For backward compatibility:
+
+using Kokkos::is_memory_space ;
+using Kokkos::is_memory_traits ;
+using Kokkos::is_execution_space ;
+using Kokkos::is_execution_policy ;
+using Kokkos::is_array_layout ;
+
+// Implementation concept:
+
+KOKKOS_IMPL_IS_CONCEPT( iteration_pattern )
+KOKKOS_IMPL_IS_CONCEPT( schedule_type )
+KOKKOS_IMPL_IS_CONCEPT( index_type )
+
+}
+
+#undef KOKKOS_IMPL_IS_CONCEPT
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+template< class ExecutionSpace , class MemorySpace >
+struct Device {
+  static_assert( Kokkos::is_execution_space<ExecutionSpace>::value
+               , "Execution space is not valid" );
+  static_assert( Kokkos::is_memory_space<MemorySpace>::value
+               , "Memory space is not valid" );
+  typedef ExecutionSpace                        execution_space;
+  typedef MemorySpace                           memory_space;
+  typedef Device<execution_space,memory_space>  device_type;
+};
+
+
+template< typename T >
+struct is_space {
+private:
+
+  template< typename , typename = void >
+  struct exe : std::false_type { typedef void space ; };
+
+  template< typename , typename = void >
+  struct mem : std::false_type { typedef void space ; };
+
+  template< typename , typename = void >
+  struct dev : std::false_type { typedef void space ; };
+
+  template< typename U >
+  struct exe<U,typename std::conditional<true,void,typename U::execution_space>::type>
+    : std::is_same<U,typename U::execution_space>::type
+    { typedef typename U::execution_space space ; };
+
+  template< typename U >
+  struct mem<U,typename std::conditional<true,void,typename U::memory_space>::type>
+    : std::is_same<U,typename U::memory_space>::type
+    { typedef typename U::memory_space space ; };
+
+  template< typename U >
+  struct dev<U,typename std::conditional<true,void,typename U::device_type>::type>
+    : std::is_same<U,typename U::device_type>::type
+    { typedef typename U::device_type space ; };
+
+  typedef typename is_space::template exe<T> is_exe ;
+  typedef typename is_space::template mem<T> is_mem ;
+  typedef typename is_space::template dev<T> is_dev ;
+
+public:
+
+  enum { value = is_exe::value || is_mem::value || is_dev::value };
+
+  typedef typename is_exe::space execution_space ;
+  typedef typename is_mem::space memory_space ;
+
+  // For backward compatibility, deprecated in favor of
+  // Kokkos::Impl::HostMirror<S>::host_mirror_space
+
+  typedef typename std::conditional
+    < std::is_same< memory_space , Kokkos::HostSpace >::value
+#if defined( KOKKOS_HAVE_CUDA )
+      || std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
+      || std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
+#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
+    , memory_space
+    , Kokkos::HostSpace
+    >::type  host_memory_space ;
+
+#if defined( KOKKOS_HAVE_CUDA )
+  typedef typename std::conditional
+    < std::is_same< execution_space , Kokkos::Cuda >::value
+    , Kokkos::DefaultHostExecutionSpace , execution_space
+    >::type  host_execution_space ;
+#else
+  typedef execution_space  host_execution_space ;
+#endif
+
+  typedef typename std::conditional
+    < std::is_same< execution_space , host_execution_space >::value &&
+      std::is_same< memory_space ,    host_memory_space    >::value
+    , T , Kokkos::Device< host_execution_space , host_memory_space >
+    >::type  host_mirror_space ;
+};
+
+// For backward compatiblity
+
+namespace Impl {
+
+using Kokkos::is_space ;
+
+}
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+/**\brief  Access relationship between DstMemorySpace and SrcMemorySpace
+ *
+ *  The default case can assume accessibility for the same space.
+ *  Specializations must be defined for different memory spaces.
+ */
+template< typename DstMemorySpace , typename SrcMemorySpace >
+struct MemorySpaceAccess {
+
+  static_assert( Kokkos::is_memory_space< DstMemorySpace >::value &&
+                 Kokkos::is_memory_space< SrcMemorySpace >::value
+               , "template arguments must be memory spaces" );
+
+  /**\brief  Can a View (or pointer) to memory in SrcMemorySpace
+   *         be assigned to a View (or pointer) to memory marked DstMemorySpace.
+   *
+   *  1. DstMemorySpace::execution_space == SrcMemorySpace::execution_space
+   *  2. All execution spaces that can access DstMemorySpace can also access
+   *     SrcMemorySpace.
+   */
+  enum { assignable = std::is_same<DstMemorySpace,SrcMemorySpace>::value };
+
+  /**\brief  For all DstExecSpace::memory_space == DstMemorySpace
+   *         DstExecSpace can access SrcMemorySpace.
+   */
+  enum { accessible = assignable };
+
+  /**\brief  Does a DeepCopy capability exist
+   *         to DstMemorySpace from SrcMemorySpace
+   */
+  enum { deepcopy = assignable };
+};
+
+
+/**\brief  Can AccessSpace access MemorySpace ?
+ *
+ *   Requires:
+ *     Kokkos::is_space< AccessSpace >::value
+ *     Kokkos::is_memory_space< MemorySpace >::value
+ *
+ *   Can AccessSpace::execution_space access MemorySpace ?
+ *     enum : bool { accessible };
+ *
+ *   Is View<AccessSpace::memory_space> assignable from View<MemorySpace> ?
+ *     enum : bool { assignable };
+ *
+ *   If ! accessible then through which intercessory memory space
+ *   should a be used to deep copy memory for
+ *     AccessSpace::execution_space
+ *   to get access.
+ *   When AccessSpace::memory_space == Kokkos::HostSpace
+ *   then space is the View host mirror space.
+ */
+template< typename AccessSpace , typename MemorySpace >
+struct SpaceAccessibility {
+private:
+
+  static_assert( Kokkos::is_space< AccessSpace >::value
+               , "template argument #1 must be a Kokkos space" );
+
+  static_assert( Kokkos::is_memory_space< MemorySpace >::value
+               , "template argument #2 must be a Kokkos memory space" );
+
+  // The input AccessSpace may be a Device<ExecSpace,MemSpace>
+  // verify that it is a valid combination of spaces.
+  static_assert( Kokkos::Impl::MemorySpaceAccess
+                   < typename AccessSpace::execution_space::memory_space
+                   , typename AccessSpace::memory_space
+                   >::accessible
+               , "template argument #1 is an invalid space" );
+
+  typedef Kokkos::Impl::MemorySpaceAccess
+    < typename AccessSpace::execution_space::memory_space , MemorySpace >
+      exe_access ;
+
+  typedef Kokkos::Impl::MemorySpaceAccess
+    < typename AccessSpace::memory_space , MemorySpace >
+      mem_access ;
+
+public:
+
+  /**\brief  Can AccessSpace::execution_space access MemorySpace ?
+   *
+   *  Default based upon memory space accessibility.
+   *  Specialization required for other relationships.
+   */
+  enum { accessible = exe_access::accessible };
+
+  /**\brief  Can assign to AccessSpace from MemorySpace ?
+   *
+   *  Default based upon memory space accessibility.
+   *  Specialization required for other relationships.
+   */
+  enum { assignable =
+    is_memory_space< AccessSpace >::value && mem_access::assignable };
+
+  /**\brief  Can deep copy to AccessSpace::memory_Space from MemorySpace ?  */
+  enum { deepcopy = mem_access::deepcopy };
+
+  // What intercessory space for AccessSpace::execution_space
+  // to be able to access MemorySpace?
+  // If same memory space or not accessible use the AccessSpace
+  // else construct a device with execution space and memory space.
+  typedef typename std::conditional
+    < std::is_same<typename AccessSpace::memory_space,MemorySpace>::value ||
+      ! exe_access::accessible
+    , AccessSpace
+    , Kokkos::Device< typename AccessSpace::execution_space , MemorySpace >
+    >::type  space ;
+};
+
+}} // namespace Kokkos::Impl
+
+//----------------------------------------------------------------------------
+
 #endif // KOKKOS_CORE_CONCEPTS_HPP
 
diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp
index 7cde4610ee8957c2eea7a9a2e05c8f2cbb9463f4..266f750d3753321bb142a2c80fe8d65f8a034f90 100644
--- a/lib/kokkos/core/src/Kokkos_Core.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core.hpp
@@ -72,6 +72,7 @@
 #include <Kokkos_Vectorization.hpp>
 #include <Kokkos_Atomic.hpp>
 #include <Kokkos_hwloc.hpp>
+#include <Kokkos_Timer.hpp>
 
 #ifdef KOKKOS_HAVE_CXX11
 #include <Kokkos_Complex.hpp>
@@ -112,7 +113,6 @@ void fence();
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 /* Allocate memory from a memory space.
  * The allocation is tracked in Kokkos memory tracking system, so
@@ -155,18 +155,8 @@ void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
     reallocate_tracked( arg_alloc , arg_alloc_size );
 }
 
-} // namespace Experimental
 } // namespace Kokkos
 
-
-namespace Kokkos {
-
-using Kokkos::Experimental::kokkos_malloc ;
-using Kokkos::Experimental::kokkos_realloc ;
-using Kokkos::Experimental::kokkos_free ;
-
-}
-
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
index e9648b59b8f62c5cb4ea46c00ec1498c361cbdb4..0f5ef9200a9b14ac1cec7361449c5f123cc24f48 100644
--- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -49,6 +49,7 @@
 // and compiler environment then sets a collection of #define macros.
 
 #include <Kokkos_Macros.hpp>
+#include <impl/Kokkos_Utilities.hpp>
 
 //----------------------------------------------------------------------------
 // Have assumed a 64bit build (8byte pointers) throughout the code base.
@@ -207,7 +208,7 @@ namespace Impl {
 
 template< class Functor
         , class Policy
-        , class EnableFunctor = void 
+        , class EnableFunctor = void
 	      , class EnablePolicy = void
         >
 struct FunctorPolicyExecutionSpace;
@@ -220,7 +221,7 @@ struct FunctorPolicyExecutionSpace;
 /// This is an implementation detail of parallel_for.  Users should
 /// skip this and go directly to the nonmember function parallel_for.
 template< class FunctorType , class ExecPolicy , class ExecutionSpace =
-          typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space 
+          typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
         > class ParallelFor ;
 
 /// \class ParallelReduce
@@ -229,7 +230,7 @@ template< class FunctorType , class ExecPolicy , class ExecutionSpace =
 /// This is an implementation detail of parallel_reduce.  Users should
 /// skip this and go directly to the nonmember function parallel_reduce.
 template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType, class ExecutionSpace =
-          typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space 
+          typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
         > class ParallelReduce ;
 
 /// \class ParallelScan
@@ -238,8 +239,8 @@ template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType
 /// This is an implementation detail of parallel_scan.  Users should
 /// skip this and go directly to the documentation of the nonmember
 /// template function Kokkos::parallel_scan.
-template< class FunctorType , class ExecPolicy , class ExecutionSapce = 
-          typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space 
+template< class FunctorType , class ExecPolicy , class ExecutionSapce =
+          typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
         > class ParallelScan ;
 
 }}
diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp
index 3130ee3198f35ec59dbeef7755cfffc11fda9346..84ae5ee044c4bd62c459656fccb3cb95ca7328eb 100644
--- a/lib/kokkos/core/src/Kokkos_Cuda.hpp
+++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp
@@ -56,7 +56,7 @@
 #include <Kokkos_CudaSpace.hpp>
 
 #include <Kokkos_Parallel.hpp>
-#include <Kokkos_TaskPolicy.hpp>
+#include <Kokkos_TaskScheduler.hpp>
 #include <Kokkos_Layout.hpp>
 #include <Kokkos_ScratchSpace.hpp>
 #include <Kokkos_MemoryTraits.hpp>
@@ -229,6 +229,39 @@ private:
 namespace Kokkos {
 namespace Impl {
 
+template<>
+struct MemorySpaceAccess
+  < Kokkos::CudaSpace
+  , Kokkos::Cuda::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
+#if defined( KOKKOS_USE_CUDA_UVM )
+
+// If forcing use of UVM everywhere
+// then must assume that CudaUVMSpace
+// can be a stand-in for CudaSpace.
+// This will fail when a strange host-side execution space
+// that defines CudaUVMSpace as its preferredmemory space.
+
+template<>
+struct MemorySpaceAccess
+  < Kokkos::CudaUVMSpace
+  , Kokkos::Cuda::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
+#endif
+
+
 template<>
 struct VerifyExecutionCanAccessMemorySpace
   < Kokkos::CudaSpace
@@ -259,9 +292,6 @@ struct VerifyExecutionCanAccessMemorySpace
 
 #include <Cuda/Kokkos_CudaExec.hpp>
 #include <Cuda/Kokkos_Cuda_View.hpp>
-
-#include <Cuda/KokkosExp_Cuda_View.hpp>
-
 #include <Cuda/Kokkos_Cuda_Parallel.hpp>
 #include <Cuda/Kokkos_Cuda_Task.hpp>
 
diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp
index cd728895d0f02419d702ccb37ec9b048b08a6df8..fd9b0ad123004c1a125b6cebb93af8052e68e719 100644
--- a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp
@@ -88,6 +88,9 @@ public:
   void deallocate( void * const arg_alloc_ptr
                  , const size_t arg_alloc_size ) const ;
 
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name();
+
   /*--------------------------------*/
   /** \brief  Error reporting for HostSpace attempt to access CudaSpace */
   static void access_error();
@@ -97,7 +100,8 @@ private:
 
   int  m_device ; ///< Which Cuda device
 
-  // friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
+  static constexpr const char* m_name = "Cuda";
+  friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
 };
 
 namespace Impl {
@@ -156,6 +160,14 @@ public:
   /** \brief  If UVM capability is available */
   static bool available();
 
+
+  /*--------------------------------*/
+  /** \brief  CudaUVMSpace specific routine */
+  static int number_of_allocations();
+
+  /*--------------------------------*/
+
+
   /*--------------------------------*/
 
   CudaUVMSpace();
@@ -172,11 +184,16 @@ public:
   void deallocate( void * const arg_alloc_ptr
                  , const size_t arg_alloc_size ) const ;
 
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name();
+
   /*--------------------------------*/
 
 private:
-
   int  m_device ; ///< Which Cuda device
+
+  static constexpr const char* m_name = "CudaUVM";
+
 };
 
 } // namespace Kokkos
@@ -215,6 +232,13 @@ public:
   void deallocate( void * const arg_alloc_ptr
                  , const size_t arg_alloc_size ) const ;
 
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name();
+
+private:
+
+  static constexpr const char* m_name = "CudaHostPinned";
+
   /*--------------------------------*/
 };
 
@@ -226,6 +250,126 @@ public:
 namespace Kokkos {
 namespace Impl {
 
+static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" );
+static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" );
+static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" );
+
+//----------------------------------------
+
+template<>
+struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace > {
+  enum { assignable = false };
+  enum { accessible = false };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
+  // HostSpace::execution_space != CudaUVMSpace::execution_space
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
+  // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
+  enum { assignable = true };
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+//----------------------------------------
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace > {
+  enum { assignable = false };
+  enum { accessible = false };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
+  // CudaSpace::execution_space == CudaUVMSpace::execution_space
+  enum { assignable = true };
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
+  // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
+  enum { assignable = false };
+  enum { accessible = true }; // CudaSpace::execution_space
+  enum { deepcopy   = true };
+};
+
+//----------------------------------------
+// CudaUVMSpace::execution_space == Cuda
+// CudaUVMSpace accessible to both Cuda and Host
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace > {
+  enum { assignable = false };
+  enum { accessible = false }; // Cuda cannot access HostSpace
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
+  // CudaUVMSpace::execution_space == CudaSpace::execution_space
+  // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
+  enum { assignable = false };
+
+  // CudaUVMSpace::execution_space can access CudaSpace
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
+  // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
+  enum { assignable = false };
+  enum { accessible = true }; // CudaUVMSpace::execution_space
+  enum { deepcopy   = true };
+};
+
+
+//----------------------------------------
+// CudaHostPinnedSpace::execution_space == HostSpace::execution_space
+// CudaHostPinnedSpace accessible to both Cuda and Host
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace > {
+  enum { assignable = false }; // Cannot access from Cuda
+  enum { accessible = true };  // CudaHostPinnedSpace::execution_space
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
+  enum { assignable = false }; // Cannot access from Host
+  enum { accessible = false };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
+  enum { assignable = false }; // different execution_space
+  enum { accessible = true };  // same accessibility
+  enum { deepcopy   = true };
+};
+
+//----------------------------------------
+
+}} // namespace Kokkos::Impl
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Impl {
+
 void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
 
 template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
@@ -553,7 +697,6 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHost
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 template<>
@@ -791,7 +934,6 @@ public:
 };
 
 } // namespace Impl
-} // namespace Experimental
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
index 5834fc04dbe43c78bd53b032db1e97ade5e34655..db4d67ae7d9656a998c1d3ff867dc6c1601562b7 100644
--- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
+++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
@@ -52,6 +52,7 @@
 #include <impl/Kokkos_AnalyzePolicy.hpp>
 #include <Kokkos_Concepts.hpp>
 #include <iostream>
+
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
@@ -82,7 +83,6 @@ class RangePolicy
   : public Impl::PolicyTraits<Properties ... >
 {
 private:
-
   typedef Impl::PolicyTraits<Properties ... > traits;
 
   typename traits::execution_space m_space ;
@@ -90,8 +90,8 @@ private:
   typename traits::index_type  m_end ;
   typename traits::index_type  m_granularity ;
   typename traits::index_type  m_granularity_mask ;
-public:
 
+public:
   //! Tag this class as an execution policy
   typedef RangePolicy execution_policy;
   typedef typename traits::index_type member_type ;
@@ -100,7 +100,6 @@ public:
   KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
   KOKKOS_INLINE_FUNCTION member_type end()   const { return m_end ; }
 
-
   //TODO: find a better workaround for Clangs weird instantiation order
   // This thing is here because of an instantiation error, where the RangePolicy is inserted into FunctorValue Traits, which
   // tries decltype on the operator. It tries to do this even though the first argument of parallel for clearly doesn't match.
@@ -135,47 +134,45 @@ public:
                  , work_begin , work_end )
     {}
 
-  public:
-
-     /** \brief return chunk_size */
-     inline member_type chunk_size() const {
-       return m_granularity;
-     }
+public:
+  /** \brief return chunk_size */
+  inline member_type chunk_size() const {
+    return m_granularity;
+  }
+
+  /** \brief set chunk_size to a discrete value*/
+  inline RangePolicy set_chunk_size(int chunk_size_) const {
+    RangePolicy p = *this;
+    p.m_granularity = chunk_size_;
+    p.m_granularity_mask = p.m_granularity - 1;
+    return p;
+  }
 
-     /** \brief set chunk_size to a discrete value*/
-     inline RangePolicy set_chunk_size(int chunk_size_) const {
-       RangePolicy p = *this;
-       p.m_granularity = chunk_size_;
-       p.m_granularity_mask = p.m_granularity - 1;
-       return p;
-     }
+private:
+  /** \brief finalize chunk_size if it was set to AUTO*/
+  inline void set_auto_chunk_size() {
+
+   typename traits::index_type concurrency = traits::execution_space::concurrency();
+   if( concurrency==0 ) concurrency=1;
+
+   if(m_granularity > 0) {
+     if(!Impl::is_integral_power_of_two( m_granularity ))
+       Kokkos::abort("RangePolicy blocking granularity must be power of two" );
+   }
+
+   member_type new_chunk_size = 1;
+   while(new_chunk_size*100*concurrency < m_end-m_begin)
+     new_chunk_size *= 2;
+   if(new_chunk_size < 128) {
+     new_chunk_size = 1;
+     while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) )
+       new_chunk_size*=2;
+   }
+   m_granularity = new_chunk_size;
+   m_granularity_mask = m_granularity - 1;
+  }
 
-  private:
-     /** \brief finalize chunk_size if it was set to AUTO*/
-     inline void set_auto_chunk_size() {
-
-       typename traits::index_type concurrency = traits::execution_space::concurrency();
-       if( concurrency==0 ) concurrency=1;
-
-       if(m_granularity > 0) {
-         if(!Impl::is_integral_power_of_two( m_granularity ))
-           Kokkos::abort("RangePolicy blocking granularity must be power of two" );
-       }
-
-
-       member_type new_chunk_size = 1;
-       while(new_chunk_size*100*concurrency < m_end-m_begin)
-         new_chunk_size *= 2;
-       if(new_chunk_size < 128) {
-         new_chunk_size = 1;
-         while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) )
-           new_chunk_size*=2;
-       }
-       m_granularity = new_chunk_size;
-       m_granularity_mask = m_granularity - 1;
-     }
-
-  public:
+public:
   /** \brief  Subrange for a partition's rank and size.
    *
    *  Typically used to partition a range over a group of threads.
@@ -212,16 +209,15 @@ public:
           if ( range.end() < m_end )   m_end   = range.end() ;
         }
       }
-  private:
-     member_type m_begin ;
-     member_type m_end ;
-     WorkRange();
-     WorkRange & operator = ( const WorkRange & );
 
+  private:
+    member_type m_begin ;
+    member_type m_end ;
+    WorkRange();
+    WorkRange & operator = ( const WorkRange & );
   };
 };
 
-
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
@@ -231,7 +227,6 @@ namespace Kokkos {
 
 namespace Impl {
 
-
 template< class ExecSpace, class ... Properties>
 class TeamPolicyInternal: public Impl::PolicyTraits<Properties ... > {
 private:
@@ -245,6 +240,10 @@ public:
    *  This size takes into account execution space concurrency limitations and
    *  scratch memory space limitations for reductions, team reduce/scan, and
    *  team shared memory.
+   *
+   *  This function only works for single-operator functors.
+   *  With multi-operator functors it cannot be determined
+   *  which operator will be called.
    */
   template< class FunctorType >
   static int team_size_max( const FunctorType & );
@@ -254,6 +253,10 @@ public:
    *  This size takes into account execution space concurrency limitations and
    *  scratch memory space limitations for reductions, team reduce/scan, and
    *  team shared memory.
+   *
+   *  This function only works for single-operator functors.
+   *  With multi-operator functors it cannot be determined
+   *  which operator will be called.
    */
   template< class FunctorType >
   static int team_size_recommended( const FunctorType & );
@@ -344,9 +347,7 @@ public:
     KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ;
   };
 };
-}
 
-namespace Impl {
   struct PerTeamValue {
     int value;
     PerTeamValue(int arg);
@@ -356,12 +357,12 @@ namespace Impl {
     int value;
     PerThreadValue(int arg);
   };
+
 }
 
 Impl::PerTeamValue PerTeam(const int& arg);
 Impl::PerThreadValue PerThread(const int& arg);
 
-
 /** \brief  Execution policy for parallel work over a league of teams of threads.
  *
  *  The work functor is called for each thread of each team such that
@@ -443,10 +444,6 @@ public:
 
 };
 
-} // namespace Kokkos
-
-namespace Kokkos {
-
 namespace Impl {
 
 template<typename iType, class TeamMemberType>
@@ -484,8 +481,8 @@ public:
 
   KOKKOS_INLINE_FUNCTION
   TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
-                                , const iType& arg_end
-                                )
+                                 , const iType& arg_end
+                                 )
     : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
     , end(   iend(   0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
     , thread( arg_thread )
@@ -502,32 +499,33 @@ public:
     {}
 };
 
-  template<typename iType, class TeamMemberType>
-  struct ThreadVectorRangeBoundariesStruct {
-    typedef iType index_type;
-    enum {start = 0};
-    const iType end;
-    enum {increment = 1};
+template<typename iType, class TeamMemberType>
+struct ThreadVectorRangeBoundariesStruct {
+  typedef iType index_type;
+  enum {start = 0};
+  const iType end;
+  enum {increment = 1};
 
-    KOKKOS_INLINE_FUNCTION
-    ThreadVectorRangeBoundariesStruct (const TeamMemberType& thread, const iType& count):
-      end( count )
-    {}
-  };
+  KOKKOS_INLINE_FUNCTION
+  ThreadVectorRangeBoundariesStruct ( const TeamMemberType, const iType& count ) : end( count ) {}
+  KOKKOS_INLINE_FUNCTION
+  ThreadVectorRangeBoundariesStruct ( const iType& count ) : end( count ) {}
+};
 
-  template<class TeamMemberType>
-  struct ThreadSingleStruct {
-    const TeamMemberType& team_member;
-    KOKKOS_INLINE_FUNCTION
-    ThreadSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
-  };
+template<class TeamMemberType>
+struct ThreadSingleStruct {
+  const TeamMemberType& team_member;
+  KOKKOS_INLINE_FUNCTION
+  ThreadSingleStruct( const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
+};
+
+template<class TeamMemberType>
+struct VectorSingleStruct {
+  const TeamMemberType& team_member;
+  KOKKOS_INLINE_FUNCTION
+  VectorSingleStruct( const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
+};
 
-  template<class TeamMemberType>
-  struct VectorSingleStruct {
-    const TeamMemberType& team_member;
-    KOKKOS_INLINE_FUNCTION
-    VectorSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
-  };
 } // namespace Impl
 
 /** \brief  Execution policy for parallel work over a threads within a team.
@@ -538,7 +536,8 @@ public:
  */
 template<typename iType, class TeamMemberType>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& count);
+Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
+TeamThreadRange( const TeamMemberType&, const iType& count );
 
 /** \brief  Execution policy for parallel work over a threads within a team.
  *
@@ -546,9 +545,10 @@ Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(cons
  *  This policy is used together with a parallel pattern as a nested layer within a kernel launched
  *  with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end].
  */
-template<typename iType, class TeamMemberType>
+template<typename iType1, typename iType2, class TeamMemberType>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& begin, const iType& end);
+Impl::TeamThreadRangeBoundariesStruct<typename std::common_type<iType1, iType2>::type, TeamMemberType>
+TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end );
 
 /** \brief  Execution policy for a vector parallel loop.
  *
@@ -558,13 +558,12 @@ Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(cons
  */
 template<typename iType, class TeamMemberType>
 KOKKOS_INLINE_FUNCTION
-Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count);
+Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
+ThreadVectorRange( const TeamMemberType&, const iType& count );
 
 } // namespace Kokkos
 
-
 #endif /* #define KOKKOS_EXECPOLICY_HPP */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
-
diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
index e02689b0f96f370448061cb90bd80a3492d32c35..10e735fe00151d302600f69fce3de798025621af 100644
--- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
@@ -46,7 +46,6 @@
 
 
 #include <Kokkos_HostSpace.hpp>
-#include <impl/Kokkos_HBWAllocators.hpp>
 
 /*--------------------------------------------------------------------------*/
 #ifdef KOKKOS_HAVE_HBWSPACE
@@ -148,11 +147,14 @@ public:
   void deallocate( void * const arg_alloc_ptr 
                  , const size_t arg_alloc_size ) const ;
 
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name();
+
 private:
 
   AllocationMechanism  m_alloc_mech ;
-
-  friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ;
+  static constexpr const char* m_name = "HBW";
+  friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ;
 };
 
 } // namespace Experimental
@@ -162,7 +164,6 @@ private:
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 template<>
@@ -239,9 +240,33 @@ public:
 };
 
 } // namespace Impl
-} // namespace Experimental
 } // namespace Kokkos
 
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::Experimental::HBWSpace >::assignable , "" );
+
+template<>
+struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > {
+  enum { assignable = true };
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace> {
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+}}
+
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
index 5fe686559a07d63cb4a07bf821203672c1336699..0292dd8a6c73ab56e63d0af528e41e5d676a3f6b 100644
--- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
@@ -50,12 +50,12 @@
 #include <typeinfo>
 
 #include <Kokkos_Core_fwd.hpp>
+#include <Kokkos_Concepts.hpp>
 #include <Kokkos_MemoryTraits.hpp>
 
 #include <impl/Kokkos_Traits.hpp>
 #include <impl/Kokkos_Error.hpp>
-
-#include <impl/KokkosExp_SharedAlloc.hpp>
+#include <impl/Kokkos_SharedAlloc.hpp>
 
 /*--------------------------------------------------------------------------*/
 
@@ -155,20 +155,63 @@ public:
   void deallocate( void * const arg_alloc_ptr 
                  , const size_t arg_alloc_size ) const ;
 
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name();
+
 private:
 
   AllocationMechanism  m_alloc_mech ;
+  static constexpr const char* m_name = "Host";
+  friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ;
+};
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" );
+
+
+template< typename S >
+struct HostMirror {
+private:
 
-  friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ;
+  // If input execution space can access HostSpace then keep it.
+  // Example: Kokkos::OpenMP can access, Kokkos::Cuda cannot
+  enum { keep_exe = Kokkos::Impl::MemorySpaceAccess
+    < typename S::execution_space::memory_space , Kokkos::HostSpace >
+      ::accessible };
+
+  // If HostSpace can access memory space then keep it.
+  // Example:  Cannot access Kokkos::CudaSpace, can access Kokkos::CudaUVMSpace
+  enum { keep_mem = Kokkos::Impl::MemorySpaceAccess
+    < Kokkos::HostSpace , typename S::memory_space >::accessible };
+
+public:
+
+  typedef typename std::conditional
+    < keep_exe && keep_mem /* Can keep whole space */
+    , S
+    , typename std::conditional
+        < keep_mem /* Can keep memory space, use default Host execution space */
+        , Kokkos::Device< Kokkos::HostSpace::execution_space
+                        , typename S::memory_space >
+        , Kokkos::HostSpace
+        >::type
+    >::type  Space ;
 };
 
+} // namespace Impl
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 template<>
@@ -245,7 +288,6 @@ public:
 };
 
 } // namespace Impl
-} // namespace Experimental
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp
index c77c33703bdd76161b20c2e5ae59b96c03c4550e..8ffbc8bb03d7cc3ed9693c3c5feb727edbdc4b4c 100644
--- a/lib/kokkos/core/src/Kokkos_Layout.hpp
+++ b/lib/kokkos/core/src/Kokkos_Layout.hpp
@@ -82,7 +82,7 @@ struct LayoutLeft {
   LayoutLeft & operator = ( LayoutLeft && ) = default ;
 
   KOKKOS_INLINE_FUNCTION
-  constexpr
+  explicit constexpr
   LayoutLeft( size_t N0 = 0 , size_t N1 = 0 , size_t N2 = 0 , size_t N3 = 0
             , size_t N4 = 0 , size_t N5 = 0 , size_t N6 = 0 , size_t N7 = 0 )
     : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } {}
@@ -114,7 +114,7 @@ struct LayoutRight {
   LayoutRight & operator = ( LayoutRight && ) = default ;
 
   KOKKOS_INLINE_FUNCTION
-  constexpr
+  explicit constexpr
   LayoutRight( size_t N0 = 0 , size_t N1 = 0 , size_t N2 = 0 , size_t N3 = 0
              , size_t N4 = 0 , size_t N5 = 0 , size_t N6 = 0 , size_t N7 = 0 )
     : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } {}
@@ -132,6 +132,11 @@ struct LayoutStride {
   size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ;
   size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ; 
 
+  LayoutStride( LayoutStride const & ) = default ;
+  LayoutStride( LayoutStride && ) = default ;
+  LayoutStride & operator = ( LayoutStride const & ) = default ;
+  LayoutStride & operator = ( LayoutStride && ) = default ;
+
   /** \brief  Compute strides from ordered dimensions.
    *
    *  Values of order uniquely form the set [0..rank)
@@ -164,7 +169,8 @@ struct LayoutStride {
       return tmp ;
     }
 
-  KOKKOS_INLINE_FUNCTION constexpr
+  KOKKOS_INLINE_FUNCTION
+  explicit constexpr
   LayoutStride( size_t N0 = 0 , size_t S0 = 0
               , size_t N1 = 0 , size_t S1 = 0
               , size_t N2 = 0 , size_t S2 = 0
@@ -220,7 +226,7 @@ struct LayoutTileLeft {
   LayoutTileLeft & operator = ( LayoutTileLeft && ) = default ;
 
   KOKKOS_INLINE_FUNCTION
-  constexpr
+  explicit constexpr
   LayoutTileLeft( size_t argN0 = 0 , size_t argN1 = 0 , size_t argN2 = 0 , size_t argN3 = 0
                 , size_t argN4 = 0 , size_t argN5 = 0 , size_t argN6 = 0 , size_t argN7 = 0
                 )
diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp
index 7d1e59af5e473db94a5ed6361bb3d6ee7b9b47e6..fbe699deb8191cd023b8277cdd28d501be37c3aa 100644
--- a/lib/kokkos/core/src/Kokkos_Macros.hpp
+++ b/lib/kokkos/core/src/Kokkos_Macros.hpp
@@ -114,11 +114,11 @@
 #error "#include <cuda.h> did not define CUDA_VERSION"
 #endif
 
-#if ( CUDA_VERSION < 6050 )
-// CUDA supports (inofficially) C++11 in device code starting with
-// version 6.5. This includes auto type and device code internal
+#if ( CUDA_VERSION < 7000 )
+// CUDA supports C++11 in device code starting with
+// version 7.0. This includes auto type and device code internal
 // lambdas.
-#error "Cuda version 6.5 or greater required"
+#error "Cuda version 7.0 or greater required"
 #endif
 
 #if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 )
@@ -127,16 +127,19 @@
 #endif
 
 #ifdef KOKKOS_CUDA_USE_LAMBDA
-#if ( CUDA_VERSION < 7000 )
-// CUDA supports C++11 lambdas generated in host code to be given
-// to the device starting with version 7.5. But the release candidate (7.5.6)
-// still identifies as 7.0
-#error "Cuda version 7.5 or greater required for host-to-device Lambda support"
-#endif
-#if ( CUDA_VERSION < 8000 )
-#define KOKKOS_LAMBDA [=]__device__
+#if ( CUDA_VERSION < 7050 )
+  // CUDA supports C++11 lambdas generated in host code to be given
+  // to the device starting with version 7.5. But the release candidate (7.5.6)
+  // still identifies as 7.0
+  #error "Cuda version 7.5 or greater required for host-to-device Lambda support"
+#endif
+#if ( CUDA_VERSION < 8000 ) && defined(__NVCC__)
+  #define KOKKOS_LAMBDA [=]__device__
 #else
-#define KOKKOS_LAMBDA [=]__host__ __device__
+  #define KOKKOS_LAMBDA [=]__host__ __device__
+  #if defined( KOKKOS_HAVE_CXX1Z )
+    #define KOKKOS_CLASS_LAMBDA        [=,*this] __host__ __device__
+  #endif
 #endif
 #define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
 #endif
@@ -145,7 +148,7 @@
 
 #if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
    // Cuda version 8.0 still needs the functor wrapper
-   #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ )
+   #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
       #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
    #endif
 #endif
@@ -153,13 +156,12 @@
 /*--------------------------------------------------------------------------*/
 /* Language info: C++, CUDA, OPENMP */
 
-#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
   // Compiling Cuda code to 'ptx'
 
   #define KOKKOS_FORCEINLINE_FUNCTION  __device__  __host__  __forceinline__
   #define KOKKOS_INLINE_FUNCTION       __device__  __host__  inline
   #define KOKKOS_FUNCTION              __device__  __host__
-
 #endif /* #if defined( __CUDA_ARCH__ ) */
 
 #if defined( _OPENMP )
@@ -184,10 +186,12 @@
 
 #else
 #if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
+    #if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
     // CUDA (including version 6.5) does not support giving lambdas as
     // arguments to global functions. Thus its not currently possible
     // to dispatch lambdas from the host.
     #define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
+    #endif
   #endif
 #endif /* #if defined( __NVCC__ ) */
 
@@ -195,7 +199,11 @@
   #define KOKKOS_LAMBDA [=]
 #endif
 
-#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */
+#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
+  #define KOKKOS_CLASS_LAMBDA [=,*this]
+#endif
+
+//#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */
 
 /* Intel compiler for host code */
 
@@ -243,7 +251,7 @@
   #endif
 #endif
 
-#endif /* #if ! defined( __CUDA_ARCH__ ) */
+//#endif /* #if ! defined( __CUDA_ARCH__ ) */
 
 /*--------------------------------------------------------------------------*/
 /*--------------------------------------------------------------------------*/
@@ -257,6 +265,20 @@
   #define KOKKOS_HAVE_PRAGMA_VECTOR 1
   #define KOKKOS_HAVE_PRAGMA_SIMD 1
 
+  #define KOKKOS_RESTRICT __restrict__
+
+  #ifndef KOKKOS_ALIGN
+  #define KOKKOS_ALIGN(size) __attribute__((aligned(size)))
+  #endif
+
+  #ifndef KOKKOS_ALIGN_PTR
+  #define KOKKOS_ALIGN_PTR(size) __attribute__((align_value(size)))
+  #endif
+
+  #ifndef KOKKOS_ALIGN_SIZE
+  #define KOKKOS_ALIGN_SIZE 64
+  #endif
+
   #if ( 1400 > KOKKOS_COMPILER_INTEL )
     #if ( 1300 > KOKKOS_COMPILER_INTEL )
       #error "Compiling with Intel version earlier than 13.0 is not supported. Official minimal version is 14.0."
@@ -264,11 +286,11 @@
       #warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0."
     #endif
   #endif
-  #if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 )
+  #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 )
     #define KOKKOS_ENABLE_ASM 1
   #endif
 
-  #if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_FORCEINLINE_FUNCTION )
+  #if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
     #if !defined (_WIN32)
       #define KOKKOS_FORCEINLINE_FUNCTION  inline __attribute__((always_inline))
     #else
@@ -335,14 +357,11 @@
     #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
   #endif
 
-  #if ! defined( KOKKOS_ENABLE_ASM ) && \
-      ! ( defined( __powerpc) || \
-          defined(__powerpc__) || \
-          defined(__powerpc64__) || \
-          defined(__POWERPC__) || \
-          defined(__ppc__) || \
-          defined(__ppc64__) || \
-          defined(__PGIC__) )
+  #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( __PGIC__ ) && \
+      ( defined( __amd64 ) || \
+        defined( __amd64__ ) || \
+        defined( __x86_64 ) || \
+        defined( __x86_64__ ) )
     #define KOKKOS_ENABLE_ASM 1
   #endif
 
@@ -385,10 +404,30 @@
 #define KOKKOS_FUNCTION /**/
 #endif
 
+
+//----------------------------------------------------------------------------
+///** Define empty macro for restrict if necessary: */
+
+#if ! defined(KOKKOS_RESTRICT)
+#define KOKKOS_RESTRICT
+#endif
+
 //----------------------------------------------------------------------------
 /** Define Macro for alignment: */
+#if ! defined KOKKOS_ALIGN_SIZE
+#define KOKKOS_ALIGN_SIZE 16
+#endif
+
+#if ! defined(KOKKOS_ALIGN)
+#define KOKKOS_ALIGN(size) __attribute__((aligned(size)))
+#endif
+
+#if ! defined(KOKKOS_ALIGN_PTR)
+#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
+#endif
+
 #if ! defined(KOKKOS_ALIGN_16)
-#define KOKKOS_ALIGN_16 __attribute__((aligned(16)))
+#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
 #endif
 
 //----------------------------------------------------------------------------
@@ -456,10 +495,6 @@
  * are no longer supported.
  */
 
-#if defined( KOKKOS_USING_DEPRECATED_VIEW )
-#error "Kokkos deprecated View has been removed"
-#endif
-
 #define KOKKOS_USING_EXP_VIEW 1
 #define KOKKOS_USING_EXPERIMENTAL_VIEW
 
diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp
index d843f7c9a1442f9ce1a268c04bf6395f28ed94c7..e4f895b7d310f048f9ca20b6fb2688b9fede93c8 100644
--- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp
+++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp
@@ -49,7 +49,7 @@
 #include <Kokkos_Atomic.hpp>
 #include <impl/Kokkos_BitOps.hpp>
 #include <impl/Kokkos_Error.hpp>
-#include <impl/KokkosExp_SharedAlloc.hpp>
+#include <impl/Kokkos_SharedAlloc.hpp>
 
 #include <limits>
 #include <algorithm>
@@ -70,12 +70,6 @@
 //#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
 //#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
 
-// A superblock is considered full when this percentage of its pages are full.
-#define KOKKOS_MEMPOOL_SB_FULL_FRACTION 0.80
-
-// A page is considered full when this percentage of its blocks are full.
-#define KOKKOS_MEMPOOL_PAGE_FULL_FRACTION 0.875  // 28 / 32
-
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
@@ -128,7 +122,7 @@ struct bitset_count
   { dst += src; }
 
   KOKKOS_INLINE_FUNCTION
-  void operator()( size_type i, value_type & count) const
+  void operator()( size_type i, value_type & count ) const
   {
     count += Kokkos::Impl::bit_count( m_words[i] );
   }
@@ -183,7 +177,7 @@ public:
 
   size_type count() const
   {
-    size_type val;
+    size_type val = 0;
     bitset_count< Bitset > bc( m_words, m_num_words, val );
     return val;
   }
@@ -232,6 +226,20 @@ public:
     return atomic_fetch_and( &m_words[ word_pos ], ~mask ) & mask;
   }
 
+  KOKKOS_FORCEINLINE_FUNCTION
+  Kokkos::pair< bool, word_type >
+  fetch_word_set( size_type i ) const
+  {
+    size_type word_pos = i >> LG_WORD_SIZE;
+    word_type mask = word_type(1) << ( i & WORD_MASK );
+
+    Kokkos::pair<bool, word_type> result;
+    result.second = atomic_fetch_or( &m_words[ word_pos ], mask );
+    result.first = !( result.second & mask );
+
+    return result;
+  }
+
   KOKKOS_FORCEINLINE_FUNCTION
   Kokkos::pair< bool, word_type >
   fetch_word_reset( size_type i ) const
@@ -247,12 +255,10 @@ public:
   }
 
   KOKKOS_FORCEINLINE_FUNCTION
-  Kokkos::pair< bool, size_type >
-  set_any_in_word( size_type i, word_type & prev_val ) const
+  Kokkos::pair< bool, word_type >
+  set_any_in_word( size_type & pos ) const
   {
-    prev_val = 0;
-
-    size_type word_pos = i >> LG_WORD_SIZE;
+    size_type word_pos = pos >> LG_WORD_SIZE;
     word_type word = volatile_load( &m_words[ word_pos ] );
 
     // Loop until there are no more unset bits in the word.
@@ -261,28 +267,26 @@ public:
       size_type bit = Kokkos::Impl::bit_scan_forward( ~word );
 
       // Try to set the bit.
-			word_type mask = word_type(1) << bit;
+      word_type mask = word_type(1) << bit;
       word = atomic_fetch_or( &m_words[ word_pos ], mask );
 
       if ( !( word & mask ) ) {
         // Successfully set the bit.
-        prev_val = word;
+        pos = ( word_pos << LG_WORD_SIZE ) + bit;
 
-        return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit );
+        return Kokkos::pair<bool, word_type>( true, word );
       }
     }
 
     // Didn't find a free bit in this word.
-    return Kokkos::pair<bool, size_type>( false, i );
+    return Kokkos::pair<bool, word_type>( false, word_type(0) );
   }
 
   KOKKOS_FORCEINLINE_FUNCTION
-  Kokkos::pair< bool, size_type >
-  set_any_in_word( size_type i, word_type & prev_val, word_type word_mask ) const
+  Kokkos::pair< bool, word_type >
+  set_any_in_word( size_type & pos, word_type word_mask ) const
   {
-    prev_val = 0;
-
-    size_type word_pos = i >> LG_WORD_SIZE;
+    size_type word_pos = pos >> LG_WORD_SIZE;
     word_type word = volatile_load( &m_words[ word_pos ] );
     word = ( ~word ) & word_mask;
 
@@ -292,30 +296,28 @@ public:
       size_type bit = Kokkos::Impl::bit_scan_forward( word );
 
       // Try to set the bit.
-			word_type mask = word_type(1) << bit;
+      word_type mask = word_type(1) << bit;
       word = atomic_fetch_or( &m_words[ word_pos ], mask );
 
       if ( !( word & mask ) ) {
         // Successfully set the bit.
-        prev_val = word;
+        pos = ( word_pos << LG_WORD_SIZE ) + bit;
 
-        return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit );
+        return Kokkos::pair<bool, word_type>( true, word );
       }
 
       word = ( ~word ) & word_mask;
     }
 
     // Didn't find a free bit in this word.
-    return Kokkos::pair<bool, size_type>( false, i );
+    return Kokkos::pair<bool, word_type>( false, word_type(0) );
   }
 
   KOKKOS_FORCEINLINE_FUNCTION
-  Kokkos::pair< bool, size_type >
-  reset_any_in_word( size_type i, word_type & prev_val ) const
+  Kokkos::pair< bool, word_type >
+  reset_any_in_word( size_type & pos ) const
   {
-    prev_val = 0;
-
-    size_type word_pos = i >> LG_WORD_SIZE;
+    size_type word_pos = pos >> LG_WORD_SIZE;
     word_type word = volatile_load( &m_words[ word_pos ] );
 
     // Loop until there are no more set bits in the word.
@@ -324,28 +326,26 @@ public:
       size_type bit = Kokkos::Impl::bit_scan_forward( word );
 
       // Try to reset the bit.
-			word_type mask = word_type(1) << bit;
+      word_type mask = word_type(1) << bit;
       word = atomic_fetch_and( &m_words[ word_pos ], ~mask );
 
       if ( word & mask ) {
         // Successfully reset the bit.
-        prev_val = word;
+        pos = ( word_pos << LG_WORD_SIZE ) + bit;
 
-        return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit );
+        return Kokkos::pair<bool, word_type>( true, word );
       }
     }
 
     // Didn't find a free bit in this word.
-    return Kokkos::pair<bool, size_type>( false, i );
+    return Kokkos::pair<bool, word_type>( false, word_type(0) );
   }
 
   KOKKOS_FORCEINLINE_FUNCTION
-  Kokkos::pair< bool, size_type >
-  reset_any_in_word( size_type i, word_type & prev_val, word_type word_mask ) const
+  Kokkos::pair< bool, word_type >
+  reset_any_in_word( size_type & pos, word_type word_mask ) const
   {
-    prev_val = 0;
-
-    size_type word_pos = i >> LG_WORD_SIZE;
+    size_type word_pos = pos >> LG_WORD_SIZE;
     word_type word = volatile_load( &m_words[ word_pos ] );
     word = word & word_mask;
 
@@ -355,21 +355,21 @@ public:
       size_type bit = Kokkos::Impl::bit_scan_forward( word );
 
       // Try to reset the bit.
-			word_type mask = word_type(1) << bit;
+      word_type mask = word_type(1) << bit;
       word = atomic_fetch_and( &m_words[ word_pos ], ~mask );
 
       if ( word & mask ) {
         // Successfully reset the bit.
-        prev_val = word;
+        pos = ( word_pos << LG_WORD_SIZE ) + bit;
 
-        return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit );
+        return Kokkos::pair<bool, word_type>( true, word );
       }
 
       word = word & word_mask;
     }
 
     // Didn't find a free bit in this word.
-    return Kokkos::pair<bool, size_type>( false, i );
+    return Kokkos::pair<bool, word_type>( false, word_type(0) );
   }
 };
 
@@ -442,7 +442,7 @@ struct create_histogram {
 
         total_allocated_blocks += page_allocated_blocks;
 
-        atomic_fetch_add( &m_page_histogram(page_allocated_blocks), 1 );
+        atomic_increment( &m_page_histogram(page_allocated_blocks) );
       }
 
       r.first += double(total_allocated_blocks) / blocks_per_sb;
@@ -609,7 +609,7 @@ public:
   };
 
 private:
-  typedef Impl::SharedAllocationTracker            Tracker;
+  typedef Kokkos::Impl::SharedAllocationTracker    Tracker;
   typedef View< uint32_t *, device_type >          UInt32View;
   typedef View< SuperblockHeader *, device_type >  SBHeaderView;
 
@@ -726,11 +726,11 @@ public:
 
     // Allocate memory for Views.  This is done here instead of at construction
     // so that the runtime checks can be performed before allocating memory.
-    resize(m_active, m_num_block_size );
-    resize(m_sb_header, m_num_sb );
+    resize( m_active, m_num_block_size );
+    resize( m_sb_header, m_num_sb );
 
     // Allocate superblock memory.
-    typedef Impl::SharedAllocationRecord< backend_memory_space, void >  SharedRecord;
+    typedef Kokkos::Impl::SharedAllocationRecord< backend_memory_space, void >  SharedRecord;
     SharedRecord * rec =
       SharedRecord::allocate( memspace, "mempool", m_total_size );
 
@@ -751,10 +751,15 @@ public:
                         m_ceil_num_sb * m_num_block_size );
 
     // Initialize all active superblocks to be invalid.
-    typename UInt32View::HostMirror host_active = create_mirror_view(m_active);
-    for (size_t i = 0; i < m_num_block_size; ++i) host_active(i) = INVALID_SUPERBLOCK;
+    typename UInt32View::HostMirror host_active = create_mirror_view( m_active );
+    for ( size_t i = 0; i < m_num_block_size; ++i ) host_active(i) = INVALID_SUPERBLOCK;
+    deep_copy( m_active, host_active );
+
+    // A superblock is considered full when this percentage of its pages are full.
+    const double superblock_full_fraction = .8;
 
-    deep_copy(m_active, host_active);
+    // A page is considered full when this percentage of its blocks are full.
+    const double page_full_fraction = .875;
 
     // Initialize the blocksize info.
     for ( size_t i = 0; i < m_num_block_size; ++i ) {
@@ -767,7 +772,7 @@ public:
 
       // Set the full level for the superblock.
       m_blocksize_info[i].m_sb_full_level =
-        static_cast<uint32_t>( pages_per_sb * KOKKOS_MEMPOOL_SB_FULL_FRACTION );
+        static_cast<uint32_t>( pages_per_sb * superblock_full_fraction );
 
       if ( m_blocksize_info[i].m_sb_full_level == 0 ) {
         m_blocksize_info[i].m_sb_full_level = 1;
@@ -778,7 +783,7 @@ public:
         blocks_per_sb < BLOCKS_PER_PAGE ? blocks_per_sb : BLOCKS_PER_PAGE;
 
       m_blocksize_info[i].m_page_full_level =
-        static_cast<uint32_t>( blocks_per_page * KOKKOS_MEMPOOL_PAGE_FULL_FRACTION );
+        static_cast<uint32_t>( blocks_per_page * page_full_fraction );
 
       if ( m_blocksize_info[i].m_page_full_level == 0 ) {
         m_blocksize_info[i].m_page_full_level = 1;
@@ -820,7 +825,7 @@ public:
   /// \brief  The actual block size allocated given alloc_size.
   KOKKOS_INLINE_FUNCTION
   size_t allocate_block_size( const size_t alloc_size ) const
-  { return size_t(1) << ( get_block_size_index( alloc_size ) + LG_MIN_BLOCK_SIZE); }
+  { return size_t(1) << ( get_block_size_index( alloc_size ) + LG_MIN_BLOCK_SIZE ); }
 
   /// \brief Allocate a chunk of memory.
   /// \param alloc_size Size of the requested allocated in number of bytes.
@@ -834,27 +839,41 @@ public:
 
     // Only support allocations up to the superblock size.  Just return 0
     // (failed allocation) for any size above this.
-    if (alloc_size <= m_sb_size )
+    if ( alloc_size <= m_sb_size )
     {
       int block_size_id = get_block_size_index( alloc_size );
       uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
       uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
+
+#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
+      // Without this test it looks like pages_per_sb might come back wrong.
+      if ( pages_per_sb == 0 ) return NULL;
+#endif
+
       unsigned word_size = blocks_per_sb > 32 ? 32 : blocks_per_sb;
       unsigned word_mask = ( uint64_t(1) << word_size ) - 1;
 
+      // Instead of forcing an atomic read to guarantee the updated value,
+      // reading the old value is actually beneficial because more threads will
+      // attempt allocations on the old active superblock instead of waiting on
+      // the new active superblock.  This will help hide the latency of
+      // switching the active superblock.
       uint32_t sb_id = volatile_load( &m_active(block_size_id) );
 
-      // If the active is locked, keep reading it until the lock is released.
+      // If the active is locked, keep reading it atomically until the lock is
+      // released.
       while ( sb_id == SUPERBLOCK_LOCK ) {
-        sb_id = volatile_load( &m_active(block_size_id) );
+        sb_id = atomic_fetch_or( &m_active(block_size_id), uint32_t(0) );
       }
 
+      load_fence();
+
       bool allocation_done = false;
 
-      while (!allocation_done) {
+      while ( !allocation_done ) {
         bool need_new_sb = false;
 
-        if (sb_id != INVALID_SUPERBLOCK) {
+        if ( sb_id != INVALID_SUPERBLOCK ) {
           // Use the value from the clock register as the hash value.
           uint64_t hash_val = get_clock_register();
 
@@ -875,12 +894,11 @@ public:
 
           bool search_done = false;
 
-          while (!search_done) {
-            bool success;
-            unsigned prev_val;
+          while ( !search_done ) {
+            bool success = false;
+            unsigned prev_val = 0;
 
-            Kokkos::tie( success, pos ) =
-              m_sb_blocks.set_any_in_word( pos, prev_val, word_mask );
+            Kokkos::tie( success, prev_val ) = m_sb_blocks.set_any_in_word( pos, word_mask );
 
             if ( !success ) {
               if ( ++pages_searched >= pages_per_sb ) {
@@ -905,6 +923,8 @@ public:
             }
             else {
               // Reserved a memory location to allocate.
+              memory_fence();
+
               search_done = true;
               allocation_done = true;
 
@@ -918,7 +938,7 @@ public:
               if ( used_bits == 0 ) {
                 // This page was empty.  Decrement the number of empty pages for
                 // the superblock.
-                atomic_fetch_sub( &m_sb_header(sb_id).m_empty_pages, 1 );
+                atomic_decrement( &m_sb_header(sb_id).m_empty_pages );
               }
               else if ( used_bits == m_blocksize_info[block_size_id].m_page_full_level - 1 )
               {
@@ -962,7 +982,7 @@ public:
 #ifdef KOKKOS_MEMPOOL_PRINT_INFO
     else {
       printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
-              alloc_size, m_sb_size);
+              alloc_size, m_sb_size );
 #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
       fflush( stdout );
 #endif
@@ -997,8 +1017,10 @@ public:
       uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE;
       uint32_t pos_rel = offset >> lg_block_size;
 
-      bool success;
-      unsigned prev_val;
+      bool success = false;
+      unsigned prev_val = 0;
+
+      memory_fence();
 
       Kokkos::tie( success, prev_val ) = m_sb_blocks.fetch_word_reset( pos_base + pos_rel );
 
@@ -1023,7 +1045,7 @@ public:
               volatile_store( &m_sb_header(sb_id).m_empty_pages, uint32_t(0) );
               volatile_store( &m_sb_header(sb_id).m_lg_block_size, uint32_t(0) );
 
-              memory_fence();
+              store_fence();
 
               m_empty_sb.set( sb_id );
             }
@@ -1088,7 +1110,7 @@ public:
     printf( "\n" );
 
 #ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
-    typename SBHeaderView::HostMirror host_sb_header = create_mirror_view(m_sb_header);
+    typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
     deep_copy( host_sb_header, m_sb_header );
 
     UInt32View num_allocated_blocks( "Allocated Blocks", m_num_sb );
@@ -1101,7 +1123,7 @@ public:
     }
 
     typename UInt32View::HostMirror host_num_allocated_blocks =
-      create_mirror_view(num_allocated_blocks);
+      create_mirror_view( num_allocated_blocks );
     deep_copy( host_num_allocated_blocks, num_allocated_blocks );
 
     // Print header info of all superblocks.
@@ -1135,7 +1157,7 @@ public:
              m_lg_max_sb_blocks, LG_MIN_BLOCK_SIZE, BLOCKS_PER_PAGE, result );
     }
 
-    typename UInt32View::HostMirror host_page_histogram = create_mirror_view(page_histogram);
+    typename UInt32View::HostMirror host_page_histogram = create_mirror_view( page_histogram );
     deep_copy( host_page_histogram, page_histogram );
 
     // Find the used and total pages and blocks.
@@ -1158,8 +1180,8 @@ public:
     double percent_used_blocks = total_blocks == 0 ? 0.0 : double(used_blocks) / total_blocks;
 
     // Count active superblocks.
-    typename UInt32View::HostMirror host_active = create_mirror_view(m_active);
-    deep_copy(host_active, m_active);
+    typename UInt32View::HostMirror host_active = create_mirror_view( m_active );
+    deep_copy( host_active, m_active );
 
     unsigned num_active_sb = 0;
     for ( size_t i = 0; i < m_num_block_size; ++i ) {
@@ -1224,6 +1246,7 @@ public:
     // Print the blocks used for each page of a few individual superblocks.
     for ( uint32_t i = 0; i < num_sb_id; ++i ) {
       uint32_t lg_block_size = host_sb_header(sb_id[i]).m_lg_block_size;
+
       if ( lg_block_size != 0 ) {
         printf( "SB_ID    BLOCK ID    USED_BLOCKS\n" );
 
@@ -1249,16 +1272,16 @@ public:
 #endif
 
     printf( "   Used blocks: %10u / %10u = %10.6lf\n", used_blocks, total_blocks,
-           percent_used_blocks );
+            percent_used_blocks );
     printf( "    Used pages: %10u / %10u = %10.6lf\n", used_pages, total_pages,
-           percent_used_pages );
+            percent_used_pages );
     printf( "       Used SB: %10zu / %10zu = %10.6lf\n", m_num_sb - num_empty_sb, m_num_sb,
-           percent_used_sb );
+            percent_used_sb );
     printf( "     Active SB: %10u\n", num_active_sb );
     printf( "      Empty SB: %10u\n", num_empty_sb );
     printf( "   Partfull SB: %10u\n", num_partfull_sb );
     printf( "       Full SB: %10lu\n",
-           m_num_sb - num_active_sb - num_empty_sb - num_partfull_sb );
+            m_num_sb - num_active_sb - num_empty_sb - num_partfull_sb );
     printf( "Ave. SB Full %%: %10.6lf\n", ave_sb_full );
     printf( "\n" );
     fflush( stdout );
@@ -1316,6 +1339,8 @@ private:
     uint32_t lock_sb =
       Kokkos::atomic_compare_exchange( &m_active(block_size_id), old_sb, SUPERBLOCK_LOCK );
 
+    load_fence();
+
     // Initialize the new superblock to be the previous one so the previous
     // superblock is returned if a new superblock can't be found.
     uint32_t new_sb = lock_sb;
@@ -1334,11 +1359,11 @@ private:
       // size's bitset.
       unsigned pos = block_size_id * m_ceil_num_sb;
 
-      while (!search_done) {
+      while ( !search_done ) {
         bool success = false;
-        unsigned prev_val;
+        unsigned prev_val = 0;
 
-        Kokkos::tie( success, pos ) = m_partfull_sb.reset_any_in_word( pos, prev_val );
+        Kokkos::tie( success, prev_val ) = m_partfull_sb.reset_any_in_word( pos );
 
         if ( !success ) {
           if ( ++tries >= max_tries ) {
@@ -1351,22 +1376,21 @@ private:
         }
         else {
           // Found a superblock.
+
+          // It is possible that the newly found superblock is the same as the
+          // old superblock.  In this case putting the old value back in yields
+          // correct behavior.  This could happen as follows.  This thread
+          // grabs the lock and transitions the superblock to the full state.
+          // Before it searches for a new superblock, other threads perform
+          // enough deallocations to transition the superblock to the partially
+          // full state.  This thread then searches for a partially full
+          // superblock and finds the one it removed.  There's potential for
+          // this to cause a performance issue if the same superblock keeps
+          // being removed and added due to the right mix and ordering of
+          // allocations and deallocations.
           search_done = true;
           new_sb = pos - block_size_id * m_ceil_num_sb;
 
-          // Assertions:
-          //   1. A different superblock than the current should be found.
-#ifdef KOKKOS_MEMPOOL_PRINTERR
-          if ( new_sb == lock_sb ) {
-            printf( "\n** MemoryPool::find_superblock() FOUND_SAME_SUPERBLOCK: %u **\n",
-                    new_sb);
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-            fflush( stdout );
-#endif
-            Kokkos::abort( "" );
-          }
-#endif
-
           // Set the head status for the superblock.
           volatile_store( &m_sb_header(new_sb).m_is_active, uint32_t(true) );
 
@@ -1376,7 +1400,7 @@ private:
             volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) );
           }
 
-          memory_fence();
+          store_fence();
         }
       }
 
@@ -1389,11 +1413,11 @@ private:
         // size's bitset.
         pos = 0;
 
-        while (!search_done) {
+        while ( !search_done ) {
           bool success = false;
-          unsigned prev_val;
+          unsigned prev_val = 0;
 
-          Kokkos::tie( success, pos ) = m_empty_sb.reset_any_in_word( pos, prev_val );
+          Kokkos::tie( success, prev_val ) = m_empty_sb.reset_any_in_word( pos );
 
           if ( !success ) {
             if ( ++tries >= max_tries ) {
@@ -1406,22 +1430,22 @@ private:
           }
           else {
             // Found a superblock.
+
+            // It is possible that the newly found superblock is the same as
+            // the old superblock.  In this case putting the old value back in
+            // yields correct behavior.  This could happen as follows.  This
+            // thread grabs the lock and transitions the superblock to the full
+            // state.  Before it searches for a new superblock, other threads
+            // perform enough deallocations to transition the superblock to the
+            // partially full state and then the empty state.  This thread then
+            // searches for a partially full superblock and none exist.  This
+            // thread then searches for an empty superblock and finds the one
+            // it removed.  The likelihood of this happening is so remote that
+            // the potential for this to cause a performance issue is
+            // infinitesimal.
             search_done = true;
             new_sb = pos;
 
-            // Assertions:
-            //   1. A different superblock than the current should be found.
-#ifdef KOKKOS_MEMPOOL_PRINTERR
-            if ( new_sb == lock_sb ) {
-              printf( "\n** MemoryPool::find_superblock() FOUND_SAME_SUPERBLOCK: %u **\n",
-                      new_sb);
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-              fflush( stdout );
-#endif
-              Kokkos::abort( "" );
-            }
-#endif
-
             // Set the empty pages, block size, and head status for the
             // superblock.
             volatile_store( &m_sb_header(new_sb).m_empty_pages,
@@ -1436,7 +1460,7 @@ private:
               volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) );
             }
 
-            memory_fence();
+            store_fence();
           }
         }
       }
@@ -1445,14 +1469,17 @@ private:
       atomic_exchange( &m_active(block_size_id), new_sb );
     }
     else {
-      // Either another thread has the lock and is switching the active superblock for
-      // this block size or another thread has already changed the active superblock
-      // since this thread read its value.  Keep reading the active superblock until
-      // it isn't locked to get the new active superblock.
+      // Either another thread has the lock and is switching the active
+      // superblock for this block size or another thread has already changed
+      // the active superblock since this thread read its value.  Keep
+      // atomically reading the active superblock until it isn't locked to get
+      // the new active superblock.
       do {
-        new_sb = volatile_load( &m_active(block_size_id) );
+        new_sb = atomic_fetch_or( &m_active(block_size_id), uint32_t(0) );
       } while ( new_sb == SUPERBLOCK_LOCK );
 
+      load_fence();
+
       // Assertions:
       //   1. An invalid superblock should never be found here.
       //   2. If the new superblock is the same as the previous superblock, the
@@ -1477,14 +1504,25 @@ private:
   {
 #if defined( __CUDA_ARCH__ )
     // Return value of 64-bit hi-res clock register.
-	  return clock64();
+    return clock64();
 #elif defined( __i386__ ) || defined( __x86_64 )
     // Return value of 64-bit hi-res clock register.
-    unsigned a, d;
-    __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
-    return ( (uint64_t) a) | ( ( (uint64_t) d ) << 32 );
+    unsigned a = 0, d = 0;
+
+    __asm__ volatile( "rdtsc" : "=a" (a), "=d" (d) );
+
+    return ( (uint64_t) a ) | ( ( (uint64_t) d ) << 32 );
+#elif defined( __powerpc )   || defined( __powerpc__ ) || defined( __powerpc64__ ) || \
+      defined( __POWERPC__ ) || defined( __ppc__ )     || defined( __ppc64__ )
+  unsigned int cycles = 0;
+
+  asm volatile( "mftb %0" : "=r" (cycles) );
+
+  return (uint64_t) cycles;
 #else
-    const uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+    const uint64_t ticks =
+      std::chrono::high_resolution_clock::now().time_since_epoch().count();
+
     return ticks;
 #endif
   }
@@ -1517,7 +1555,4 @@ private:
 #undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
 #endif
 
-#undef KOKKOS_MEMPOOL_SB_FULL_FRACTION
-#undef KOKKOS_MEMPOOL_PAGE_FULL_FRACTION
-
 #endif // KOKKOS_MEMORYPOOL_HPP
diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
index 5ee1f16fec854fc0ee45e39c488095fdee73ed4f..94b58b8affe1921f2bfa9faf1e25b3dc303c5220 100644
--- a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
+++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
@@ -63,6 +63,8 @@ enum MemoryTraitsFlags
   { Unmanaged  = 0x01
   , RandomAccess = 0x02
   , Atomic = 0x04
+  , Restrict = 0x08
+  , Aligned = 0x10
   };
 
 template < unsigned T >
@@ -73,6 +75,8 @@ struct MemoryTraits {
   enum { Unmanaged    = T & unsigned(Kokkos::Unmanaged) };
   enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) };
   enum { Atomic       = T & unsigned(Kokkos::Atomic) };
+  enum { Restrict     = T & unsigned(Kokkos::Restrict) };
+  enum { Aligned      = T & unsigned(Kokkos::Aligned) };
 
 };
 
diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp
index 7be4f8245f98ea464d8a27313c13c7aa35be4e46..0e6c6d84fe5199f3ea9a554e604d49c3d71c1380 100644
--- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp
+++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp
@@ -58,7 +58,7 @@
 #endif
 #include <Kokkos_ScratchSpace.hpp>
 #include <Kokkos_Parallel.hpp>
-#include <Kokkos_TaskPolicy.hpp>
+#include <Kokkos_TaskScheduler.hpp>
 #include <Kokkos_Layout.hpp>
 #include <impl/Kokkos_Tags.hpp>
 
@@ -160,6 +160,17 @@ public:
 namespace Kokkos {
 namespace Impl {
 
+template<>
+struct MemorySpaceAccess 
+  < Kokkos::OpenMP::memory_space
+  , Kokkos::OpenMP::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
 template<>
 struct VerifyExecutionCanAccessMemorySpace
   < Kokkos::OpenMP::memory_space
diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
index 695bc79a1ab900405a160843d8777651dc63cb22..3a73e8a8170fe3729500adbc263137856378170f 100644
--- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
+++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
@@ -53,7 +53,8 @@ struct is_reducer_type {
 
 template<class T>
 struct is_reducer_type<T,typename std::enable_if<
-                       std::is_same<T,typename T::reducer_type>::value
+                       std::is_same<typename std::remove_cv<T>::type,
+                                    typename std::remove_cv<typename T::reducer_type>::type>::value
                       >::type> {
   enum { value = 1 };
 };
@@ -726,6 +727,119 @@ public:
   }
 };
 
+template<class Scalar>
+struct MinMaxScalar {
+  Scalar min_val,max_val;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator = (const MinMaxScalar& rhs) {
+    min_val = rhs.min_val;
+    max_val = rhs.max_val;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator = (const volatile MinMaxScalar& rhs) volatile {
+    min_val = rhs.min_val;
+    max_val = rhs.max_val;
+  }
+};
+
+template<class Scalar, class Space = HostSpace>
+struct MinMax {
+private:
+  typedef typename std::remove_cv<Scalar>::type scalar_type;
+
+public:
+  //Required
+  typedef MinMax reducer_type;
+  typedef MinMaxScalar<scalar_type> value_type;
+
+  typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
+
+  scalar_type min_init_value;
+  scalar_type max_init_value;
+
+private:
+  result_view_type result;
+
+  template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value >
+  struct MinInitWrapper;
+
+  template<class ValueType >
+  struct MinInitWrapper<ValueType,true> {
+    static ValueType value() {
+      return std::numeric_limits<scalar_type>::max();
+    }
+  };
+
+  template<class ValueType >
+  struct MinInitWrapper<ValueType,false> {
+    static ValueType value() {
+      return scalar_type();
+    }
+  };
+
+  template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value >
+  struct MaxInitWrapper;
+
+  template<class ValueType >
+  struct MaxInitWrapper<ValueType,true> {
+    static ValueType value() {
+      return std::numeric_limits<scalar_type>::min();
+    }
+  };
+
+  template<class ValueType >
+  struct MaxInitWrapper<ValueType,false> {
+    static ValueType value() {
+      return scalar_type();
+    }
+  };
+
+public:
+
+  MinMax(value_type& result_):
+    min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(&result_) {}
+  MinMax(const result_view_type& result_):
+    min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(result_) {}
+  MinMax(value_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_):
+    min_init_value(min_init_value_),max_init_value(max_init_value_),result(&result_) {}
+  MinMax(const result_view_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_):
+    min_init_value(min_init_value_),max_init_value(max_init_value_),result(result_) {}
+
+  //Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src)  const {
+    if ( src.min_val < dest.min_val ) {
+      dest.min_val = src.min_val;
+    }
+    if ( src.max_val > dest.max_val ) {
+      dest.max_val = src.max_val;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if ( src.min_val < dest.min_val ) {
+      dest.min_val = src.min_val;
+    }
+    if ( src.max_val > dest.max_val ) {
+      dest.max_val = src.max_val;
+    }
+  }
+
+  //Optional
+  KOKKOS_INLINE_FUNCTION
+  void init( value_type& val)  const {
+    val.min_val = min_init_value;
+    val.max_val = max_init_value;
+  }
+
+  result_view_type result_view() const {
+    return result;
+  }
+};
+
 template<class Scalar, class Index>
 struct MinMaxLocScalar {
   Scalar min_val,max_val;
@@ -1124,7 +1238,8 @@ void parallel_reduce(const PolicyType& policy,
                      typename Impl::enable_if<
                        Kokkos::Impl::is_execution_policy<PolicyType>::value
                      >::type * = 0) {
-  Impl::ParallelReduceAdaptor<PolicyType,FunctorType,const ReturnType>::execute("",policy,functor,return_value);
+  ReturnType return_value_impl = return_value;
+  Impl::ParallelReduceAdaptor<PolicyType,FunctorType,ReturnType>::execute("",policy,functor,return_value_impl);
 }
 
 template< class FunctorType, class ReturnType >
@@ -1133,8 +1248,8 @@ void parallel_reduce(const size_t& policy,
                      const FunctorType& functor,
                      const ReturnType& return_value) {
   typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type;
-
-  Impl::ParallelReduceAdaptor<policy_type,FunctorType,const ReturnType>::execute("",policy_type(0,policy),functor,return_value);
+  ReturnType return_value_impl = return_value;
+  Impl::ParallelReduceAdaptor<policy_type,FunctorType,ReturnType>::execute("",policy_type(0,policy),functor,return_value_impl);
 }
 
 template< class FunctorType, class ReturnType >
@@ -1144,7 +1259,8 @@ void parallel_reduce(const std::string& label,
                      const FunctorType& functor,
                      const ReturnType& return_value) {
   typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type;
-  Impl::ParallelReduceAdaptor<policy_type,FunctorType,const ReturnType>::execute(label,policy_type(0,policy),functor,return_value);
+  ReturnType return_value_impl = return_value;
+  Impl::ParallelReduceAdaptor<policy_type,FunctorType,ReturnType>::execute(label,policy_type(0,policy),functor,return_value_impl);
 }
 
 // No Return Argument
diff --git a/lib/kokkos/core/src/Kokkos_Qthread.hpp b/lib/kokkos/core/src/Kokkos_Qthread.hpp
index d61f8d518e6641debd19d4975b2535a6bfbcad8f..c58518b0654bb3267a12041a2ab7fef4e2375972 100644
--- a/lib/kokkos/core/src/Kokkos_Qthread.hpp
+++ b/lib/kokkos/core/src/Kokkos_Qthread.hpp
@@ -144,6 +144,17 @@ public:
 namespace Kokkos {
 namespace Impl {
 
+template<>
+struct MemorySpaceAccess 
+  < Kokkos::Qthread::memory_space
+  , Kokkos::Qthread::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
 template<>
 struct VerifyExecutionCanAccessMemorySpace
   < Kokkos::Qthread::memory_space
diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp
index 233b56c93956f7898346780d1bfe327fd11afb03..914edbc7c4640001d95affc7e1e6175b0dfe2de6 100644
--- a/lib/kokkos/core/src/Kokkos_Serial.hpp
+++ b/lib/kokkos/core/src/Kokkos_Serial.hpp
@@ -50,7 +50,7 @@
 #include <cstddef>
 #include <iosfwd>
 #include <Kokkos_Parallel.hpp>
-#include <Kokkos_TaskPolicy.hpp>
+#include <Kokkos_TaskScheduler.hpp>
 #include <Kokkos_Layout.hpp>
 #include <Kokkos_HostSpace.hpp>
 #include <Kokkos_ScratchSpace.hpp>
@@ -59,7 +59,6 @@
 #include <impl/Kokkos_FunctorAdapter.hpp>
 #include <impl/Kokkos_Profiling_Interface.hpp>
 
-
 #include <KokkosExp_MDRangePolicy.hpp>
 
 #if defined( KOKKOS_HAVE_SERIAL )
@@ -192,6 +191,17 @@ public:
 namespace Kokkos {
 namespace Impl {
 
+template<>
+struct MemorySpaceAccess 
+  < Kokkos::Serial::memory_space
+  , Kokkos::Serial::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
 template<>
 struct VerifyExecutionCanAccessMemorySpace
   < Kokkos::Serial::memory_space
@@ -250,7 +260,6 @@ public:
   const scratch_memory_space & thread_scratch(int) const
     { return m_space ; }
 
-
   KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
   KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
   KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
@@ -306,10 +315,9 @@ public:
 
 } // namespace Impl
 
-
 /*
  * < Kokkos::Serial , WorkArgTag >
- * < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
+ * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
  *
  */
 namespace Impl {
@@ -402,7 +410,6 @@ public:
     , m_chunk_size ( 32 )
     {}
 
-
   inline int chunk_size() const { return m_chunk_size ; }
 
   /** \brief set chunk_size to a discrete value*/
@@ -525,7 +532,6 @@ private:
   const ReducerType   m_reducer ;
   const pointer_type  m_result_ptr ;
 
-
   template< class TagType >
   inline
   typename std::enable_if< std::is_same< TagType , void >::value >::type
@@ -895,20 +901,22 @@ struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
 
 } // namespace Impl
 
-template<typename iType>
+template< typename iType >
 KOKKOS_INLINE_FUNCTION
 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
 TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count);
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, count );
 }
 
-template<typename iType>
+template< typename iType1, typename iType2 >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
-TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & begin , const iType & end )
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::SerialTeamMember >
+TeamThreadRange( const Impl::SerialTeamMember& thread, const iType1 & begin, const iType2 & end )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end);
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, iType(begin), iType(end) );
 }
 
 template<typename iType>
@@ -1113,4 +1121,3 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
-
diff --git a/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp b/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp
index fc9113b75052e91fc260f95725fe360b98e548e8..05ed5103b874d3f8912f1e8ca6e0559967dbd86a 100644
--- a/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp
+++ b/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp
@@ -41,1069 +41,7 @@
 //@HEADER
 */
 
-// Experimental unified task-data parallel manycore LDRD
+// For backward compatibility:
 
-#ifndef KOKKOS_TASKPOLICY_HPP
-#define KOKKOS_TASKPOLICY_HPP
-
-//----------------------------------------------------------------------------
-
-#include <Kokkos_Core_fwd.hpp>
-
-// If compiling with CUDA then must be using CUDA 8 or better
-// and use relocateable device code to enable the task policy.
-// nvcc relocatable device code option: --relocatable-device-code=true
-
-#if ( defined( KOKKOS_COMPILER_NVCC ) )
-  #if ( 8000 <= CUDA_VERSION ) && \
-      defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
-
-  #define KOKKOS_ENABLE_TASKPOLICY
-
-  #endif
-#else
-
-#define KOKKOS_ENABLE_TASKPOLICY
-
-#endif
-
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-//----------------------------------------------------------------------------
-
-#include <Kokkos_MemoryPool.hpp>
-#include <impl/Kokkos_Tags.hpp>
-#include <impl/Kokkos_TaskQueue.hpp>
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-enum TaskType { TaskTeam   = Impl::TaskBase<void,void,void>::TaskTeam
-              , TaskSingle = Impl::TaskBase<void,void,void>::TaskSingle };
-
-enum TaskPriority { TaskHighPriority    = 0
-                  , TaskRegularPriority = 1
-                  , TaskLowPriority     = 2 };
-
-template< typename Space >
-class TaskPolicy ;
-
-template< typename Space >
-void wait( TaskPolicy< Space > const & );
-
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-/*\brief  Implementation data for task data management, access, and execution.
- *
- *  CRTP Inheritance structure to allow static_cast from the
- *  task root type and a task's FunctorType.
- *
- *    TaskBase< Space , ResultType , FunctorType >
- *      : TaskBase< Space , ResultType , void >
- *      , FunctorType
- *      { ... };
- *
- *    TaskBase< Space , ResultType , void >
- *      : TaskBase< Space , void , void >
- *      { ... };
- */
-template< typename Space , typename ResultType , typename FunctorType >
-class TaskBase ;
-
-template< typename Space >
-class TaskExec ;
-
-}} // namespace Kokkos::Impl
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-/**
- *
- *  Future< space >  // value_type == void
- *  Future< value >  // space == Default
- *  Future< value , space >
- *
- */
-template< typename Arg1 /* = void */ , typename Arg2 /* = void */ >
-class Future {
-private:
-
-  template< typename > friend class TaskPolicy ;
-  template< typename , typename > friend class Future ;
-  template< typename , typename , typename > friend class Impl::TaskBase ;
-
-  enum { Arg1_is_space  = Kokkos::Impl::is_space< Arg1 >::value };
-  enum { Arg2_is_space  = Kokkos::Impl::is_space< Arg2 >::value };
-  enum { Arg1_is_value  = ! Arg1_is_space &&
-                          ! std::is_same< Arg1 , void >::value };
-  enum { Arg2_is_value  = ! Arg2_is_space &&
-                          ! std::is_same< Arg2 , void >::value };
-
-  static_assert( ! ( Arg1_is_space && Arg2_is_space )
-               , "Future cannot be given two spaces" );
-
-  static_assert( ! ( Arg1_is_value && Arg2_is_value )
-               , "Future cannot be given two value types" );
-
-  using ValueType =
-    typename std::conditional< Arg1_is_value , Arg1 ,
-    typename std::conditional< Arg2_is_value , Arg2 , void
-    >::type >::type ;
-
-  using Space =
-    typename std::conditional< Arg1_is_space , Arg1 ,
-    typename std::conditional< Arg2_is_space , Arg2 , void
-    >::type >::type ;
-
-  using task_base  = Impl::TaskBase< Space , ValueType , void > ;
-  using queue_type = Impl::TaskQueue< Space > ;
-
-  task_base * m_task ;
-
-  KOKKOS_INLINE_FUNCTION explicit
-  Future( task_base * task ) : m_task(0)
-    { if ( task ) queue_type::assign( & m_task , task ); }
-
-  //----------------------------------------
-
-public:
-
-  using execution_space = typename Space::execution_space ;
-  using value_type      = ValueType ;
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  bool is_null() const { return 0 == m_task ; }
-
-  KOKKOS_INLINE_FUNCTION
-  int reference_count() const
-    { return 0 != m_task ? m_task->reference_count() : 0 ; }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  ~Future() { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr Future() noexcept : m_task(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  Future( Future && rhs )
-    : m_task( rhs.m_task ) { rhs.m_task = 0 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  Future( const Future & rhs )
-    : m_task(0)
-    { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); }
-
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( Future && rhs )
-    {
-      if ( m_task ) queue_type::assign( & m_task , (task_base*)0 );
-      m_task = rhs.m_task ;
-      rhs.m_task = 0 ;
-      return *this ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( const Future & rhs )
-    {
-      if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task );
-      return *this ;
-    }
-
-  //----------------------------------------
-
-  template< class A1 , class A2 >
-  KOKKOS_INLINE_FUNCTION
-  Future( Future<A1,A2> && rhs )
-    : m_task( rhs.m_task )
-    {
-      static_assert
-        ( std::is_same< Space , void >::value ||
-          std::is_same< Space , typename Future<A1,A2>::Space >::value
-        , "Assigned Futures must have the same space" );
-
-      static_assert
-        ( std::is_same< value_type , void >::value ||
-          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
-        , "Assigned Futures must have the same value_type" );
-
-      rhs.m_task = 0 ;
-    }
-
-  template< class A1 , class A2 >
-  KOKKOS_INLINE_FUNCTION
-  Future( const Future<A1,A2> & rhs )
-    : m_task(0)
-    {
-      static_assert
-        ( std::is_same< Space , void >::value ||
-          std::is_same< Space , typename Future<A1,A2>::Space >::value
-        , "Assigned Futures must have the same space" );
-
-      static_assert
-        ( std::is_same< value_type , void >::value ||
-          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
-        , "Assigned Futures must have the same value_type" );
-
-      if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task );
-    }
-
-  template< class A1 , class A2 >
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( const Future<A1,A2> & rhs )
-    {
-      static_assert
-        ( std::is_same< Space , void >::value ||
-          std::is_same< Space , typename Future<A1,A2>::Space >::value
-        , "Assigned Futures must have the same space" );
-
-      static_assert
-        ( std::is_same< value_type , void >::value ||
-          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
-        , "Assigned Futures must have the same value_type" );
-
-      if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task );
-      return *this ;
-    }
-
-  template< class A1 , class A2 >
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( Future<A1,A2> && rhs )
-    {
-      static_assert
-        ( std::is_same< Space , void >::value ||
-          std::is_same< Space , typename Future<A1,A2>::Space >::value
-        , "Assigned Futures must have the same space" );
-
-      static_assert
-        ( std::is_same< value_type , void >::value ||
-          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
-        , "Assigned Futures must have the same value_type" );
-
-      if ( m_task ) queue_type::assign( & m_task , (task_base*) 0 );
-      m_task = rhs.m_task ;
-      rhs.m_task = 0 ;
-      return *this ;
-    }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  typename task_base::get_return_type
-  get() const
-    {
-      if ( 0 == m_task ) {
-        Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()");
-      }
-      return m_task->get();
-    }
-};
-
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-template< typename ExecSpace >
-class TaskPolicy
-{
-private:
-
-  using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ;
-  using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ;
-  using task_base  = Impl::TaskBase< ExecSpace , void , void > ;
-
-  track_type   m_track ;
-  queue_type * m_queue ;
-
-  //----------------------------------------
-  // Process optional arguments to spawn and respawn functions
-
-  KOKKOS_INLINE_FUNCTION static
-  void assign( task_base * const ) {}
-
-  // TaskTeam or TaskSingle
-  template< typename ... Options >
-  KOKKOS_INLINE_FUNCTION static
-  void assign( task_base * const task
-             , TaskType const & arg
-             , Options const & ... opts )
-    {
-      task->m_task_type = arg ;
-      assign( task , opts ... );
-    }
-
-  // TaskHighPriority or TaskRegularPriority or TaskLowPriority
-  template< typename ... Options >
-  KOKKOS_INLINE_FUNCTION static
-  void assign( task_base * const task
-             , TaskPriority const & arg
-             , Options const & ... opts )
-    {
-      task->m_priority = arg ;
-      assign( task , opts ... );
-    }
-
-  // Future for a dependence
-  template< typename A1 , typename A2 , typename ... Options >
-  KOKKOS_INLINE_FUNCTION static
-  void assign( task_base * const task
-             , Future< A1 , A2 > const & arg 
-             , Options const & ... opts )
-    {
-      // Assign dependence to task->m_next
-      // which will be processed within subsequent call to schedule.
-      // Error if the dependence is reset.
-
-      if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
-        Kokkos::abort("TaskPolicy ERROR: resetting task dependence");
-      }
-
-      if ( 0 != arg.m_task ) {
-        // The future may be destroyed upon returning from this call
-        // so increment reference count to track this assignment.
-        Kokkos::atomic_fetch_add( &(arg.m_task->m_ref_count) , 1 );
-      }
-
-      assign( task , opts ... );
-    }
-
-  //----------------------------------------
-
-public:
-
-  using execution_policy = TaskPolicy ;
-  using execution_space  = ExecSpace ;
-  using memory_space     = typename queue_type::memory_space ;
-  using member_type      = Kokkos::Impl::TaskExec< ExecSpace > ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskPolicy() : m_track(), m_queue(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  TaskPolicy( TaskPolicy && rhs ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskPolicy( TaskPolicy const & rhs ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskPolicy & operator = ( TaskPolicy const & rhs ) = default ;
-
-  TaskPolicy( memory_space const & arg_memory_space
-            , unsigned const arg_memory_pool_capacity
-            , unsigned const arg_memory_pool_log2_superblock = 12 )
-    : m_track()
-    , m_queue(0)
-    {
-      typedef Kokkos::Experimental::Impl::SharedAllocationRecord
-        < memory_space , typename queue_type::Destroy >
-          record_type ;
-
-      record_type * record =
-        record_type::allocate( arg_memory_space
-                             , "TaskQueue"
-                             , sizeof(queue_type)
-                             );
-
-      m_queue = new( record->data() )
-        queue_type( arg_memory_space
-                  , arg_memory_pool_capacity
-                  , arg_memory_pool_log2_superblock );
-
-      record->m_destroy.m_queue = m_queue ;
-
-      m_track.assign_allocated_record_to_uninitialized( record );
-    }
-
-  //----------------------------------------
-  /**\brief  Allocation size for a spawned task */
-  template< typename FunctorType >
-  KOKKOS_FUNCTION
-  size_t spawn_allocation_size() const
-    {
-      using task_type  = Impl::TaskBase< execution_space
-                                       , typename FunctorType::value_type
-                                       , FunctorType > ;
-
-      return m_queue->allocate_block_size( sizeof(task_type) );
-    }
-
-  /**\brief  Allocation size for a when_all aggregate */
-  KOKKOS_FUNCTION
-  size_t when_all_allocation_size( int narg ) const
-    {
-      using task_base  = Kokkos::Impl::TaskBase< ExecSpace , void , void > ;
-
-      return m_queue->allocate_block_size( sizeof(task_base) + narg * sizeof(task_base*) );
-    }
-
-  //----------------------------------------
-
-  /**\brief  A task spawns a task with options
-   *
-   *  1) High, Normal, or Low priority
-   *  2) With or without dependence
-   *  3) Team or Serial
-   */
-  template< typename FunctorType , typename ... Options >
-  KOKKOS_FUNCTION
-  Future< typename FunctorType::value_type , ExecSpace >
-  task_spawn( FunctorType const & arg_functor 
-            , Options const & ... arg_options
-            ) const
-    {
-      using value_type  = typename FunctorType::value_type ;
-      using future_type = Future< value_type , execution_space > ;
-      using task_type   = Impl::TaskBase< execution_space
-                                        , value_type
-                                        , FunctorType > ;
-
-      //----------------------------------------
-      // Give single-thread back-ends an opportunity to clear
-      // queue of ready tasks before allocating a new task
-
-      m_queue->iff_single_thread_recursive_execute();
-
-      //----------------------------------------
-
-      future_type f ;
-
-      // Allocate task from memory pool
-      f.m_task =
-        reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type)));
-
-      if ( f.m_task ) {
-
-        // Placement new construction
-        new ( f.m_task ) task_type( arg_functor );
-
-        // Reference count starts at two
-        // +1 for matching decrement when task is complete
-        // +1 for future
-        f.m_task->m_queue      = m_queue ;
-        f.m_task->m_ref_count  = 2 ;
-        f.m_task->m_alloc_size = sizeof(task_type);
-
-        assign( f.m_task , arg_options... );
-
-        // Spawning from within the execution space so the
-        // apply function pointer is guaranteed to be valid
-        f.m_task->m_apply = task_type::apply ;
-
-        m_queue->schedule( f.m_task );
-        // this task may be updated or executed at any moment
-      }
-
-      return f ;
-    }
-
-  /**\brief  The host process spawns a task with options
-   *
-   *  1) High, Normal, or Low priority
-   *  2) With or without dependence
-   *  3) Team or Serial
-   */
-  template< typename FunctorType , typename ... Options >
-  inline
-  Future< typename FunctorType::value_type , ExecSpace >
-  host_spawn( FunctorType const & arg_functor 
-            , Options const & ... arg_options
-            ) const
-    {
-      using value_type  = typename FunctorType::value_type ;
-      using future_type = Future< value_type , execution_space > ;
-      using task_type   = Impl::TaskBase< execution_space
-                                        , value_type
-                                        , FunctorType > ;
-
-      future_type f ;
-
-      // Allocate task from memory pool
-      f.m_task = 
-        reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) );
-
-      if ( f.m_task ) {
-
-        // Placement new construction
-        new( f.m_task ) task_type( arg_functor );
-
-        // Reference count starts at two:
-        // +1 to match decrement when task completes
-        // +1 for the future
-        f.m_task->m_queue      = m_queue ;
-        f.m_task->m_ref_count  = 2 ;
-        f.m_task->m_alloc_size = sizeof(task_type);
-
-        assign( f.m_task , arg_options... );
-
-        // Potentially spawning outside execution space so the
-        // apply function pointer must be obtained from execution space.
-        // Required for Cuda execution space function pointer.
-        queue_type::specialization::template
-          proc_set_apply< FunctorType >( & f.m_task->m_apply );
-
-        m_queue->schedule( f.m_task );
-      }
-      return f ;
-    }
-
-  /**\brief  Return a future that is complete
-   *         when all input futures are complete.
-   */
-  template< typename A1 , typename A2 >
-  KOKKOS_FUNCTION
-  Future< ExecSpace >
-  when_all( int narg , Future< A1 , A2 > const * const arg ) const
-    {
-      static_assert
-        ( std::is_same< execution_space
-                      , typename Future< A1 , A2 >::execution_space
-                      >::value
-        , "Future must have same execution space" );
-
-      using future_type = Future< ExecSpace > ;
-      using task_base   = Kokkos::Impl::TaskBase< ExecSpace , void , void > ;
-
-      future_type f ;
-
-      size_t const size  = sizeof(task_base) + narg * sizeof(task_base*);
-
-      f.m_task =
-        reinterpret_cast< task_base * >( m_queue->allocate( size ) );
-
-      if ( f.m_task ) {
-
-        new( f.m_task ) task_base();
-
-        // Reference count starts at two:
-        // +1 to match decrement when task completes
-        // +1 for the future
-        f.m_task->m_queue      = m_queue ;
-        f.m_task->m_ref_count  = 2 ;
-        f.m_task->m_alloc_size = size ;
-        f.m_task->m_dep_count  = narg ;
-        f.m_task->m_task_type  = task_base::Aggregate ;
-
-        task_base ** const dep = f.m_task->aggregate_dependences();
-
-        // Assign dependences to increment their reference count
-        // The futures may be destroyed upon returning from this call
-        // so increment reference count to track this assignment.
-
-        for ( int i = 0 ; i < narg ; ++i ) {
-          task_base * const t = dep[i] = arg[i].m_task ;
-          if ( 0 != t ) {
-            Kokkos::atomic_fetch_add( &(t->m_ref_count) , 1 );
-          }
-        }
-
-        m_queue->schedule( f.m_task );
-        // this when_all may be processed at any moment
-      }
-
-      return f ;
-    }
-
-  /**\brief  An executing task respawns itself with options
-   *
-   *  1) High, Normal, or Low priority
-   *  2) With or without dependence
-   */
-  template< class FunctorType , typename ... Options >
-  KOKKOS_FUNCTION
-  void respawn( FunctorType * task_self
-              , Options const & ... arg_options ) const
-    {
-      using value_type  = typename FunctorType::value_type ;
-      using task_type   = Impl::TaskBase< execution_space
-                                        , value_type
-                                        , FunctorType > ;
-
-      task_base * const zero = (task_base *) 0 ;
-      task_base * const lock = (task_base *) task_base::LockTag ;
-      task_type * const task = static_cast< task_type * >( task_self );
-
-      // Precondition:
-      //   task is in Executing state
-      //   therefore  m_next == LockTag
-      //
-      // Change to m_next == 0 for no dependence
-
-      if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
-        Kokkos::abort("TaskPolicy::respawn ERROR: already respawned");
-      }
-
-      assign( task , arg_options... );
-
-      // Postcondition:
-      //   task is in Executing-Respawn state
-      //   therefore  m_next == dependece or 0
-    }
-
-  //----------------------------------------
-
-  template< typename S >
-  friend
-  void Kokkos::wait( Kokkos::TaskPolicy< S > const & );
-
-  //----------------------------------------
-
-  inline
-  int allocation_capacity() const noexcept
-    { return m_queue->m_memory.get_mem_size(); }
-
-  KOKKOS_INLINE_FUNCTION
-  int allocated_task_count() const noexcept
-    { return m_queue->m_count_alloc ; }
-
-  KOKKOS_INLINE_FUNCTION
-  int allocated_task_count_max() const noexcept
-    { return m_queue->m_max_alloc ; }
-
-  KOKKOS_INLINE_FUNCTION
-  long allocated_task_count_accum() const noexcept
-    { return m_queue->m_accum_alloc ; }
-
-};
-
-template< typename ExecSpace >
-inline
-void wait( TaskPolicy< ExecSpace > const & policy )
-{ policy.m_queue->execute(); }
-
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-struct FutureValueTypeIsVoidError {};
-
-template < class ExecSpace , class ResultType , class FunctorType >
-class TaskMember ;
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-/**\brief  States of a task */
-enum TaskState
-  { TASK_STATE_NULL         = 0  ///<  Does not exist
-  , TASK_STATE_CONSTRUCTING = 1  ///<  Is under construction
-  , TASK_STATE_WAITING      = 2  ///<  Is waiting for execution
-  , TASK_STATE_EXECUTING    = 4  ///<  Is executing
-  , TASK_STATE_COMPLETE     = 8  ///<  Execution is complete
-  };
-
-/**\brief  Tag for Future<Latch,Space>
- */
-struct Latch {};
-
-/**
- *
- *  Future< space >  // value_type == void
- *  Future< value >  // space == Default
- *  Future< value , space >
- *
- */
-template< class Arg1 = void , class Arg2 = void >
-class Future {
-private:
-
-  template< class , class , class > friend class Impl::TaskMember ;
-  template< class > friend class TaskPolicy ;
-  template< class , class > friend class Future ;
-
-  // Argument #2, if not void, must be the space.
-  enum { Arg1_is_space  = Kokkos::Impl::is_execution_space< Arg1 >::value };
-  enum { Arg2_is_space  = Kokkos::Impl::is_execution_space< Arg2 >::value };
-  enum { Arg2_is_void   = std::is_same< Arg2 , void >::value };
-
-  struct ErrorNoExecutionSpace {};
-
-  enum { Opt1  =   Arg1_is_space && Arg2_is_void
-       , Opt2  = ! Arg1_is_space && Arg2_is_void
-       , Opt3  = ! Arg1_is_space && Arg2_is_space
-       , OptOK = Kokkos::Impl::StaticAssert< Opt1 || Opt2 || Opt3 , ErrorNoExecutionSpace >::value
-       };
-
-  typedef typename
-    Kokkos::Impl::if_c< Opt2 || Opt3 , Arg1 , void >::type
-      ValueType ;
-
-  typedef typename
-    Kokkos::Impl::if_c< Opt1 , Arg1 , typename
-    Kokkos::Impl::if_c< Opt2 , Kokkos::DefaultExecutionSpace , typename
-    Kokkos::Impl::if_c< Opt3 , Arg2 , void
-    >::type >::type >::type
-      ExecutionSpace ;
-
-  typedef Impl::TaskMember< ExecutionSpace , void , void >       TaskRoot ;
-  typedef Impl::TaskMember< ExecutionSpace , ValueType , void >  TaskValue ;
-
-  TaskRoot * m_task ;
-
-  KOKKOS_INLINE_FUNCTION explicit
-  Future( TaskRoot * task )
-    : m_task(0)
-    { TaskRoot::assign( & m_task , TaskRoot::template verify_type< ValueType >( task ) ); }
-
-  //----------------------------------------
-
-public:
-
-  typedef ValueType       value_type;
-  typedef ExecutionSpace  execution_space ;
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  TaskState get_task_state() const
-    { return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; }
-
-  KOKKOS_INLINE_FUNCTION
-  bool is_null() const { return 0 == m_task ; }
-
-  KOKKOS_INLINE_FUNCTION
-  int reference_count() const
-    { return 0 != m_task ? m_task->reference_count() : 0 ; }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  ~Future() { TaskRoot::assign( & m_task , 0 ); }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  Future() : m_task(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  Future( const Future & rhs )
-    : m_task(0)
-    { TaskRoot::assign( & m_task , rhs.m_task ); }
-
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( const Future & rhs )
-    { TaskRoot::assign( & m_task , rhs.m_task ); return *this ; }
-
-  //----------------------------------------
-
-  template< class A1 , class A2 >
-  KOKKOS_INLINE_FUNCTION
-  Future( const Future<A1,A2> & rhs )
-    : m_task(0)
-    { TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); }
-
-  template< class A1 , class A2 >
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( const Future<A1,A2> & rhs )
-    { TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); return *this ; }
-
-  //----------------------------------------
-
-  typedef typename TaskValue::get_result_type get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const
-    {
-      if ( 0 == m_task ) {
-        Kokkos::abort( "Kokkos::Experimental::Future::get ERROR: is_null()");
-      }
-      return static_cast<TaskValue*>( m_task )->get();  
-    }
-
-  //----------------------------------------
-};
-
-template< class Arg2 >
-class Future< Latch , Arg2 > {
-private:
-
-  template< class , class , class > friend class Impl::TaskMember ;
-  template< class > friend class TaskPolicy ;
-  template< class , class > friend class Future ;
-
-  // Argument #2, if not void, must be the space.
-  enum { Arg2_is_space  = Kokkos::Impl::is_execution_space< Arg2 >::value };
-  enum { Arg2_is_void   = std::is_same< Arg2 , void >::value };
-
-  static_assert( Arg2_is_space || Arg2_is_void 
-               , "Future template argument #2 must be a space" );
-
-  typedef typename
-    std::conditional< Arg2_is_space , Arg2 , Kokkos::DefaultExecutionSpace >
-     ::type ExecutionSpace ;
-
-  typedef Impl::TaskMember< ExecutionSpace , void , void >  TaskRoot ;
-
-  TaskRoot * m_task ;
-
-  KOKKOS_INLINE_FUNCTION explicit
-  Future( TaskRoot * task )
-    : m_task(0)
-    { TaskRoot::assign( & m_task , task ); }
-
-  //----------------------------------------
-
-public:
-
-  typedef void            value_type;
-  typedef ExecutionSpace  execution_space ;
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  void add( const int k ) const
-    { if ( 0 != m_task ) m_task->latch_add(k); }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  TaskState get_task_state() const
-    { return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; }
-
-  KOKKOS_INLINE_FUNCTION
-  bool is_null() const { return 0 == m_task ; }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  ~Future() { TaskRoot::assign( & m_task , 0 ); }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  Future() : m_task(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  Future( const Future & rhs )
-    : m_task(0)
-    { TaskRoot::assign( & m_task , rhs.m_task ); }
-
-  KOKKOS_INLINE_FUNCTION
-  Future & operator = ( const Future & rhs )
-    { TaskRoot::assign( & m_task , rhs.m_task ); return *this ; }
-
-  //----------------------------------------
-
-  typedef void get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  void get() const {}
-
-  //----------------------------------------
-
-};
-
-namespace Impl {
-
-template< class T >
-struct is_future : public std::false_type {};
-
-template< class Arg0 , class Arg1 >
-struct is_future< Kokkos::Experimental::Future<Arg0,Arg1> >
-  : public std::true_type {};
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-/** \brief  If the argument is an execution space then a serial task in that space */
-template< class Arg0 = Kokkos::DefaultExecutionSpace >
-class TaskPolicy {
-public:
-
-  typedef typename Arg0::execution_space  execution_space ;
-
-  //----------------------------------------
-
-  TaskPolicy
-    ( const unsigned arg_task_max_count
-    , const unsigned arg_task_max_size
-    , const unsigned arg_task_default_dependence_capacity = 4
-    , const unsigned arg_task_team_size = 0 /* choose default */
-    );
-
-  TaskPolicy() = default ;
-  TaskPolicy( TaskPolicy && rhs ) = default ;
-  TaskPolicy( const TaskPolicy & rhs ) = default ;
-  TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
-  TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
-
-  //----------------------------------------
-  /** \brief  Create a serial task with storage for dependences.
-   *
-   *  Postcondition: Task is in the 'constructing' state.
-   */
-  template< class FunctorType >
-  Future< typename FunctorType::value_type , execution_space >
-  create( const FunctorType & functor
-        , const unsigned      dependence_capacity /* = default */ );
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  create_team( const FunctorType & functor
-             , const unsigned dependence_capacity /* = default */ );
-
-  /** \brief  Set dependence that 'after' cannot start execution
-   *          until 'before' has completed.
-   *
-   *  Precondition: The 'after' task must be in then 'Constructing' state.
-   */
-  template< class TA , class TB >
-  void add_dependence( const Future<TA,execution_space> & after
-                     , const Future<TB,execution_space> & before ) const ;
-
-  /** \brief  Spawn a task in the 'Constructing' state
-   *
-   *  Precondition:  Task is in the 'constructing' state.
-   *  Postcondition: Task is waiting, executing, or complete.
-   */
-  template< class T >
-  const Future<T,execution_space> &
-  spawn( const Future<T,execution_space> & ) const ;
-
-  //----------------------------------------
-  /** \brief  Query dependence of an executing task */
-
-  template< class FunctorType >
-  Future< execution_space >
-  get_dependence( FunctorType * , const int ) const ;
-
-  //----------------------------------------
-  /** \brief  Clear current dependences of an executing task
-   *          in preparation for setting new dependences and
-   *          respawning.
-   *
-   * Precondition: The functor must be a task in the executing state.
-   */
-  template< class FunctorType >
-  void clear_dependence( FunctorType * ) const ;
-
-  /** \brief  Set dependence that 'after' cannot resume execution
-   *          until 'before' has completed.
-   *
-   *  The 'after' functor must be in the executing state
-   */
-  template< class FunctorType , class TB >
-  void add_dependence( FunctorType * after
-                     , const Future<TB,execution_space> & before ) const ;
-
-  /** \brief  Respawn (reschedule) an executing task to be called again
-   *          after all dependences have completed.
-   */
-  template< class FunctorType >
-  void respawn( FunctorType * ) const ;
-};
-
-//----------------------------------------------------------------------------
-/** \brief  Create and spawn a single-thread task */
-template< class ExecSpace , class FunctorType >
-inline
-Future< typename FunctorType::value_type , ExecSpace >
-spawn( TaskPolicy<ExecSpace> & policy , const FunctorType & functor )
-{ return policy.spawn( policy.create( functor ) ); }
-
-/** \brief  Create and spawn a single-thread task with dependences */
-template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 >
-inline
-Future< typename FunctorType::value_type , ExecSpace >
-spawn( TaskPolicy<ExecSpace>   & policy
-     , const FunctorType       & functor
-     , const Future<Arg0,Arg1> & before_0
-     , const Future<Arg0,Arg1> & before_1 )
-{
-  Future< typename FunctorType::value_type , ExecSpace > f ;
-  f = policy.create( functor , 2 );
-  policy.add_dependence( f , before_0 );
-  policy.add_dependence( f , before_1 );
-  policy.spawn( f );
-  return f ;
-}
-
-//----------------------------------------------------------------------------
-/** \brief  Create and spawn a parallel_for task */
-template< class ExecSpace , class ParallelPolicyType , class FunctorType >
-inline
-Future< typename FunctorType::value_type , ExecSpace >
-spawn_foreach( TaskPolicy<ExecSpace>     & task_policy
-             , const ParallelPolicyType  & parallel_policy
-             , const FunctorType         & functor )
-{ return task_policy.spawn( task_policy.create_foreach( parallel_policy , functor ) ); }
-
-/** \brief  Create and spawn a parallel_reduce task */
-template< class ExecSpace , class ParallelPolicyType , class FunctorType >
-inline
-Future< typename FunctorType::value_type , ExecSpace >
-spawn_reduce( TaskPolicy<ExecSpace>     & task_policy
-            , const ParallelPolicyType  & parallel_policy
-            , const FunctorType         & functor )
-{ return task_policy.spawn( task_policy.create_reduce( parallel_policy , functor ) ); }
-
-//----------------------------------------------------------------------------
-/** \brief  Respawn a task functor with dependences */
-template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 >
-inline
-void respawn( TaskPolicy<ExecSpace>   & policy
-            , FunctorType *             functor
-            , const Future<Arg0,Arg1> & before_0
-            , const Future<Arg0,Arg1> & before_1
-            )
-{
-  policy.clear_dependence( functor );
-  policy.add_dependence( functor , before_0 );
-  policy.add_dependence( functor , before_1 );
-  policy.respawn( functor );
-}
-
-//----------------------------------------------------------------------------
-
-template< class ExecSpace >
-void wait( TaskPolicy< ExecSpace > & );
-
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-#endif /* #ifndef KOKKOS_TASKPOLICY_HPP */
+#include <Kokkos_TaskScheduler.hpp>
 
diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0de926aa12e481a7ccc797e26783b35dd9ddb029
--- /dev/null
+++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp
@@ -0,0 +1,700 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_TASKSCHEDULER_HPP
+#define KOKKOS_TASKSCHEDULER_HPP
+
+//----------------------------------------------------------------------------
+
+#include <Kokkos_Core_fwd.hpp>
+
+// If compiling with CUDA then must be using CUDA 8 or better
+// and use relocateable device code to enable the task policy.
+// nvcc relocatable device code option: --relocatable-device-code=true
+
+#if ( defined( KOKKOS_HAVE_CUDA ) )
+  #if ( 8000 <= CUDA_VERSION ) && \
+      defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
+
+  #define KOKKOS_ENABLE_TASKDAG
+
+  #endif
+#else
+  #define KOKKOS_ENABLE_TASKDAG
+#endif
+
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+//----------------------------------------------------------------------------
+
+#include <Kokkos_MemoryPool.hpp>
+#include <impl/Kokkos_Tags.hpp>
+
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+// Forward declarations used in Impl::TaskQueue
+
+template< typename Arg1 = void , typename Arg2 = void >
+class Future ;
+
+template< typename Space >
+class TaskScheduler ;
+
+} // namespace Kokkos
+
+#include <impl/Kokkos_TaskQueue.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+/**
+ *
+ *  Future< space >  // value_type == void
+ *  Future< value >  // space == Default
+ *  Future< value , space >
+ *
+ */
+template< typename Arg1 , typename Arg2 >
+class Future {
+private:
+
+  template< typename > friend class TaskScheduler ;
+  template< typename , typename > friend class Future ;
+  template< typename , typename , typename > friend class Impl::TaskBase ;
+
+  enum { Arg1_is_space  = Kokkos::is_space< Arg1 >::value };
+  enum { Arg2_is_space  = Kokkos::is_space< Arg2 >::value };
+  enum { Arg1_is_value  = ! Arg1_is_space &&
+                          ! std::is_same< Arg1 , void >::value };
+  enum { Arg2_is_value  = ! Arg2_is_space &&
+                          ! std::is_same< Arg2 , void >::value };
+
+  static_assert( ! ( Arg1_is_space && Arg2_is_space )
+               , "Future cannot be given two spaces" );
+
+  static_assert( ! ( Arg1_is_value && Arg2_is_value )
+               , "Future cannot be given two value types" );
+
+  using ValueType =
+    typename std::conditional< Arg1_is_value , Arg1 ,
+    typename std::conditional< Arg2_is_value , Arg2 , void
+    >::type >::type ;
+
+  using Space =
+    typename std::conditional< Arg1_is_space , Arg1 ,
+    typename std::conditional< Arg2_is_space , Arg2 , void
+    >::type >::type ;
+
+  using task_base  = Impl::TaskBase< Space , ValueType , void > ;
+  using queue_type = Impl::TaskQueue< Space > ;
+
+  task_base * m_task ;
+
+  KOKKOS_INLINE_FUNCTION explicit
+  Future( task_base * task ) : m_task(0)
+    { if ( task ) queue_type::assign( & m_task , task ); }
+
+  //----------------------------------------
+
+public:
+
+  using execution_space = typename Space::execution_space ;
+  using value_type      = ValueType ;
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  bool is_null() const { return 0 == m_task ; }
+
+  KOKKOS_INLINE_FUNCTION
+  int reference_count() const
+    { return 0 != m_task ? m_task->reference_count() : 0 ; }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  void clear()
+    { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  ~Future() { clear(); }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr Future() noexcept : m_task(0) {}
+
+  KOKKOS_INLINE_FUNCTION
+  Future( Future && rhs )
+    : m_task( rhs.m_task ) { rhs.m_task = 0 ; }
+
+  KOKKOS_INLINE_FUNCTION
+  Future( const Future & rhs )
+    : m_task(0)
+    { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); }
+
+  KOKKOS_INLINE_FUNCTION
+  Future & operator = ( Future && rhs )
+    {
+      clear();
+      m_task = rhs.m_task ;
+      rhs.m_task = 0 ;
+      return *this ;
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  Future & operator = ( const Future & rhs )
+    {
+      if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task );
+      return *this ;
+    }
+
+  //----------------------------------------
+
+  template< class A1 , class A2 >
+  KOKKOS_INLINE_FUNCTION
+  Future( Future<A1,A2> && rhs )
+    : m_task( rhs.m_task )
+    {
+      static_assert
+        ( std::is_same< Space , void >::value ||
+          std::is_same< Space , typename Future<A1,A2>::Space >::value
+        , "Assigned Futures must have the same space" );
+
+      static_assert
+        ( std::is_same< value_type , void >::value ||
+          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
+        , "Assigned Futures must have the same value_type" );
+
+      rhs.m_task = 0 ;
+    }
+
+  template< class A1 , class A2 >
+  KOKKOS_INLINE_FUNCTION
+  Future( const Future<A1,A2> & rhs )
+    : m_task(0)
+    {
+      static_assert
+        ( std::is_same< Space , void >::value ||
+          std::is_same< Space , typename Future<A1,A2>::Space >::value
+        , "Assigned Futures must have the same space" );
+
+      static_assert
+        ( std::is_same< value_type , void >::value ||
+          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
+        , "Assigned Futures must have the same value_type" );
+
+      if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task );
+    }
+
+  template< class A1 , class A2 >
+  KOKKOS_INLINE_FUNCTION
+  Future & operator = ( const Future<A1,A2> & rhs )
+    {
+      static_assert
+        ( std::is_same< Space , void >::value ||
+          std::is_same< Space , typename Future<A1,A2>::Space >::value
+        , "Assigned Futures must have the same space" );
+
+      static_assert
+        ( std::is_same< value_type , void >::value ||
+          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
+        , "Assigned Futures must have the same value_type" );
+
+      if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task );
+      return *this ;
+    }
+
+  template< class A1 , class A2 >
+  KOKKOS_INLINE_FUNCTION
+  Future & operator = ( Future<A1,A2> && rhs )
+    {
+      static_assert
+        ( std::is_same< Space , void >::value ||
+          std::is_same< Space , typename Future<A1,A2>::Space >::value
+        , "Assigned Futures must have the same space" );
+
+      static_assert
+        ( std::is_same< value_type , void >::value ||
+          std::is_same< value_type , typename Future<A1,A2>::value_type >::value
+        , "Assigned Futures must have the same value_type" );
+
+      clear();
+      m_task = rhs.m_task ;
+      rhs.m_task = 0 ;
+      return *this ;
+    }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  typename task_base::get_return_type
+  get() const
+    {
+      if ( 0 == m_task ) {
+        Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()");
+      }
+      return m_task->get();
+    }
+};
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+enum TaskType { TaskTeam   = Impl::TaskBase<void,void,void>::TaskTeam
+              , TaskSingle = Impl::TaskBase<void,void,void>::TaskSingle };
+
+enum TaskPriority { TaskHighPriority    = 0
+                  , TaskRegularPriority = 1
+                  , TaskLowPriority     = 2 };
+
+template< typename Space >
+void wait( TaskScheduler< Space > const & );
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+template< typename ExecSpace >
+class TaskScheduler
+{
+private:
+
+  using track_type = Kokkos::Impl::SharedAllocationTracker ;
+  using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ;
+  using task_base  = Impl::TaskBase< ExecSpace , void , void > ;
+
+  track_type   m_track ;
+  queue_type * m_queue ;
+
+  //----------------------------------------
+  // Process optional arguments to spawn and respawn functions
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( task_base * const ) {}
+
+  // TaskTeam or TaskSingle
+  template< typename ... Options >
+  KOKKOS_INLINE_FUNCTION static
+  void assign( task_base * const task
+             , TaskType const & arg
+             , Options const & ... opts )
+    {
+      task->m_task_type = arg ;
+      assign( task , opts ... );
+    }
+
+  // TaskHighPriority or TaskRegularPriority or TaskLowPriority
+  template< typename ... Options >
+  KOKKOS_INLINE_FUNCTION static
+  void assign( task_base * const task
+             , TaskPriority const & arg
+             , Options const & ... opts )
+    {
+      task->m_priority = arg ;
+      assign( task , opts ... );
+    }
+
+  // Future for a dependence
+  template< typename A1 , typename A2 , typename ... Options >
+  KOKKOS_INLINE_FUNCTION static
+  void assign( task_base * const task
+             , Future< A1 , A2 > const & arg 
+             , Options const & ... opts )
+    {
+      // Assign dependence to task->m_next
+      // which will be processed within subsequent call to schedule.
+      // Error if the dependence is reset.
+
+      if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
+        Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
+      }
+
+      if ( 0 != arg.m_task ) {
+        // The future may be destroyed upon returning from this call
+        // so increment reference count to track this assignment.
+	Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
+      }
+
+      assign( task , opts ... );
+    }
+
+  //----------------------------------------
+
+public:
+
+  using execution_policy = TaskScheduler ;
+  using execution_space  = ExecSpace ;
+  using memory_space     = typename queue_type::memory_space ;
+  using member_type      = Kokkos::Impl::TaskExec< ExecSpace > ;
+
+  KOKKOS_INLINE_FUNCTION
+  TaskScheduler() : m_track(), m_queue(0) {}
+
+  KOKKOS_INLINE_FUNCTION
+  TaskScheduler( TaskScheduler && rhs ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  TaskScheduler( TaskScheduler const & rhs ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  TaskScheduler & operator = ( TaskScheduler && rhs ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  TaskScheduler & operator = ( TaskScheduler const & rhs ) = default ;
+
+  TaskScheduler( memory_space const & arg_memory_space
+               , unsigned const arg_memory_pool_capacity
+               , unsigned const arg_memory_pool_log2_superblock = 12 )
+    : m_track()
+    , m_queue(0)
+    {
+      typedef Kokkos::Impl::SharedAllocationRecord
+        < memory_space , typename queue_type::Destroy >
+          record_type ;
+
+      record_type * record =
+        record_type::allocate( arg_memory_space
+                             , "TaskQueue"
+                             , sizeof(queue_type)
+                             );
+
+      m_queue = new( record->data() )
+        queue_type( arg_memory_space
+                  , arg_memory_pool_capacity
+                  , arg_memory_pool_log2_superblock );
+
+      record->m_destroy.m_queue = m_queue ;
+
+      m_track.assign_allocated_record_to_uninitialized( record );
+    }
+
+  //----------------------------------------
+  /**\brief  Allocation size for a spawned task */
+  template< typename FunctorType >
+  KOKKOS_FUNCTION
+  size_t spawn_allocation_size() const
+    {
+      using task_type  = Impl::TaskBase< execution_space
+                                       , typename FunctorType::value_type
+                                       , FunctorType > ;
+
+      return m_queue->allocate_block_size( sizeof(task_type) );
+    }
+
+  /**\brief  Allocation size for a when_all aggregate */
+  KOKKOS_FUNCTION
+  size_t when_all_allocation_size( int narg ) const
+    {
+      using task_base  = Kokkos::Impl::TaskBase< ExecSpace , void , void > ;
+
+      return m_queue->allocate_block_size( sizeof(task_base) + narg * sizeof(task_base*) );
+    }
+
+  //----------------------------------------
+
+  /**\brief  A task spawns a task with options
+   *
+   *  1) High, Normal, or Low priority
+   *  2) With or without dependence
+   *  3) Team or Serial
+   */
+  template< typename FunctorType , typename ... Options >
+  KOKKOS_FUNCTION
+  Future< typename FunctorType::value_type , ExecSpace >
+  task_spawn( FunctorType const & arg_functor 
+            , Options const & ... arg_options
+            ) const
+    {
+      using value_type  = typename FunctorType::value_type ;
+      using future_type = Future< value_type , execution_space > ;
+      using task_type   = Impl::TaskBase< execution_space
+                                        , value_type
+                                        , FunctorType > ;
+
+      //----------------------------------------
+      // Give single-thread back-ends an opportunity to clear
+      // queue of ready tasks before allocating a new task
+
+      m_queue->iff_single_thread_recursive_execute();
+
+      //----------------------------------------
+
+      future_type f ;
+
+      // Allocate task from memory pool
+      f.m_task =
+        reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type)));
+
+      if ( f.m_task ) {
+
+        // Placement new construction
+        new ( f.m_task ) task_type( arg_functor );
+
+        // Reference count starts at two
+        // +1 for matching decrement when task is complete
+        // +1 for future
+        f.m_task->m_queue      = m_queue ;
+        f.m_task->m_ref_count  = 2 ;
+        f.m_task->m_alloc_size = sizeof(task_type);
+
+        assign( f.m_task , arg_options... );
+
+        // Spawning from within the execution space so the
+        // apply function pointer is guaranteed to be valid
+        f.m_task->m_apply = task_type::apply ;
+
+        m_queue->schedule( f.m_task );
+        // this task may be updated or executed at any moment
+      }
+
+      return f ;
+    }
+
+  /**\brief  The host process spawns a task with options
+   *
+   *  1) High, Normal, or Low priority
+   *  2) With or without dependence
+   *  3) Team or Serial
+   */
+  template< typename FunctorType , typename ... Options >
+  inline
+  Future< typename FunctorType::value_type , ExecSpace >
+  host_spawn( FunctorType const & arg_functor 
+            , Options const & ... arg_options
+            ) const
+    {
+      using value_type  = typename FunctorType::value_type ;
+      using future_type = Future< value_type , execution_space > ;
+      using task_type   = Impl::TaskBase< execution_space
+                                        , value_type
+                                        , FunctorType > ;
+
+      if ( m_queue == 0 ) {
+        Kokkos::abort("Kokkos::TaskScheduler not initialized");
+      }
+
+      future_type f ;
+
+      // Allocate task from memory pool
+      f.m_task = 
+        reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) );
+
+      if ( f.m_task ) {
+
+        // Placement new construction
+        new( f.m_task ) task_type( arg_functor );
+
+        // Reference count starts at two:
+        // +1 to match decrement when task completes
+        // +1 for the future
+        f.m_task->m_queue      = m_queue ;
+        f.m_task->m_ref_count  = 2 ;
+        f.m_task->m_alloc_size = sizeof(task_type);
+
+        assign( f.m_task , arg_options... );
+
+        // Potentially spawning outside execution space so the
+        // apply function pointer must be obtained from execution space.
+        // Required for Cuda execution space function pointer.
+        queue_type::specialization::template
+          proc_set_apply< FunctorType >( & f.m_task->m_apply );
+
+        m_queue->schedule( f.m_task );
+      }
+      return f ;
+    }
+
+  /**\brief  Return a future that is complete
+   *         when all input futures are complete.
+   */
+  template< typename A1 , typename A2 >
+  KOKKOS_FUNCTION
+  Future< ExecSpace >
+  when_all( int narg , Future< A1 , A2 > const * const arg ) const
+    {
+      static_assert
+        ( std::is_same< execution_space
+                      , typename Future< A1 , A2 >::execution_space
+                      >::value
+        , "Future must have same execution space" );
+
+      using future_type = Future< ExecSpace > ;
+      using task_base   = Kokkos::Impl::TaskBase< ExecSpace , void , void > ;
+
+      future_type f ;
+
+      size_t const size  = sizeof(task_base) + narg * sizeof(task_base*);
+
+      f.m_task =
+        reinterpret_cast< task_base * >( m_queue->allocate( size ) );
+
+      if ( f.m_task ) {
+
+        new( f.m_task ) task_base();
+
+        // Reference count starts at two:
+        // +1 to match decrement when task completes
+        // +1 for the future
+        f.m_task->m_queue      = m_queue ;
+        f.m_task->m_ref_count  = 2 ;
+        f.m_task->m_alloc_size = size ;
+        f.m_task->m_dep_count  = narg ;
+        f.m_task->m_task_type  = task_base::Aggregate ;
+
+        task_base ** const dep = f.m_task->aggregate_dependences();
+
+        // Assign dependences to increment their reference count
+        // The futures may be destroyed upon returning from this call
+        // so increment reference count to track this assignment.
+
+        for ( int i = 0 ; i < narg ; ++i ) {
+          task_base * const t = dep[i] = arg[i].m_task ;
+          if ( 0 != t ) {
+	    Kokkos::atomic_increment( &(t->m_ref_count) );
+          }
+        }
+
+        m_queue->schedule( f.m_task );
+        // this when_all may be processed at any moment
+      }
+
+      return f ;
+    }
+
+  /**\brief  An executing task respawns itself with options
+   *
+   *  1) High, Normal, or Low priority
+   *  2) With or without dependence
+   */
+  template< class FunctorType , typename ... Options >
+  KOKKOS_FUNCTION
+  void respawn( FunctorType * task_self
+              , Options const & ... arg_options ) const
+    {
+      using value_type  = typename FunctorType::value_type ;
+      using task_type   = Impl::TaskBase< execution_space
+                                        , value_type
+                                        , FunctorType > ;
+
+      task_base * const zero = (task_base *) 0 ;
+      task_base * const lock = (task_base *) task_base::LockTag ;
+      task_type * const task = static_cast< task_type * >( task_self );
+
+      // Precondition:
+      //   task is in Executing state
+      //   therefore  m_next == LockTag
+      //
+      // Change to m_next == 0 for no dependence
+
+      if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
+        Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
+      }
+
+      assign( task , arg_options... );
+
+      // Postcondition:
+      //   task is in Executing-Respawn state
+      //   therefore  m_next == dependece or 0
+    }
+
+  //----------------------------------------
+
+  template< typename S >
+  friend
+  void Kokkos::wait( Kokkos::TaskScheduler< S > const & );
+
+  //----------------------------------------
+
+  inline
+  int allocation_capacity() const noexcept
+    { return m_queue->m_memory.get_mem_size(); }
+
+  KOKKOS_INLINE_FUNCTION
+  int allocated_task_count() const noexcept
+    { return m_queue->m_count_alloc ; }
+
+  KOKKOS_INLINE_FUNCTION
+  int allocated_task_count_max() const noexcept
+    { return m_queue->m_max_alloc ; }
+
+  KOKKOS_INLINE_FUNCTION
+  long allocated_task_count_accum() const noexcept
+    { return m_queue->m_accum_alloc ; }
+
+};
+
+template< typename ExecSpace >
+inline
+void wait( TaskScheduler< ExecSpace > const & policy )
+{ policy.m_queue->execute(); }
+
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */
+
diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp
index c9ebbf92652b5d9a2e859cf2587b8089897d3c62..f01b14724ac430924a253cb16bb4e57ec67348e3 100644
--- a/lib/kokkos/core/src/Kokkos_Threads.hpp
+++ b/lib/kokkos/core/src/Kokkos_Threads.hpp
@@ -189,6 +189,17 @@ public:
 namespace Kokkos {
 namespace Impl {
 
+template<>
+struct MemorySpaceAccess 
+  < Kokkos::Threads::memory_space
+  , Kokkos::Threads::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
 template<>
 struct VerifyExecutionCanAccessMemorySpace
   < Kokkos::Threads::memory_space
diff --git a/lib/kokkos/core/src/Kokkos_Timer.hpp b/lib/kokkos/core/src/Kokkos_Timer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4eca5037e44408f5f54af173530b56c60c58e22a
--- /dev/null
+++ b/lib/kokkos/core/src/Kokkos_Timer.hpp
@@ -0,0 +1,112 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_TIMER_HPP
+#define KOKKOS_TIMER_HPP
+
+#include <stddef.h>
+
+#ifdef _MSC_VER
+#undef KOKKOS_USE_LIBRT
+#include <gettimeofday.c>
+#else
+#ifdef KOKKOS_USE_LIBRT
+#include <ctime>
+#else
+#include <sys/time.h>
+#endif
+#endif
+
+namespace Kokkos {
+
+/** \brief  Time since construction */
+
+class Timer {
+private:
+  #ifdef KOKKOS_USE_LIBRT
+	struct timespec m_old;
+  #else
+	struct timeval m_old ;
+  #endif
+  Timer( const Timer & );
+  Timer & operator = ( const Timer & );
+public:
+
+  inline
+  void reset() {
+    #ifdef KOKKOS_USE_LIBRT
+	  clock_gettime(CLOCK_REALTIME, &m_old);
+    #else
+	  gettimeofday( & m_old , ((struct timezone *) NULL ) );
+    #endif
+  }
+
+  inline
+  ~Timer() {}
+
+  inline
+  Timer() { reset(); }
+
+  inline
+  double seconds() const
+  {
+    #ifdef KOKKOS_USE_LIBRT
+      struct timespec m_new;
+      clock_gettime(CLOCK_REALTIME, &m_new);
+
+      return ( (double) ( m_new.tv_sec  - m_old.tv_sec ) ) +
+             ( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
+    #else
+      struct timeval m_new ;
+
+      gettimeofday( & m_new , ((struct timezone *) NULL ) );
+
+      return ( (double) ( m_new.tv_sec  - m_old.tv_sec ) ) +
+             ( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
+    #endif
+  }
+};
+
+} // namespace Kokkos
+
+#endif /* #ifndef KOKKOS_TIMER_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp
index 1cc8b0338155c8f8be724181806097a927d606d2..b728b36492c0d318e32b025d97f05aaf70c98dd3 100644
--- a/lib/kokkos/core/src/Kokkos_View.hpp
+++ b/lib/kokkos/core/src/Kokkos_View.hpp
@@ -61,9 +61,6 @@ namespace Kokkos {
 namespace Experimental {
 namespace Impl {
 
-template< class DstMemorySpace , class SrcMemorySpace >
-struct DeepCopy ;
-
 template< class DataType >
 struct ViewArrayAnalysis ;
 
@@ -76,31 +73,23 @@ struct ViewDataAnalysis ;
 template< class , class ... >
 class ViewMapping { public: enum { is_assignable = false }; };
 
-template< class MemorySpace >
-struct ViewOperatorBoundsErrorAbort ;
+} /* namespace Impl */
+} /* namespace Experimental */
+} /* namespace Kokkos */
 
-template<>
-struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
-  static void apply( const size_t rank
-                   , const size_t n0 , const size_t n1
-                   , const size_t n2 , const size_t n3
-                   , const size_t n4 , const size_t n5
-                   , const size_t n6 , const size_t n7
-                   , const size_t i0 , const size_t i1
-                   , const size_t i2 , const size_t i3
-                   , const size_t i4 , const size_t i5
-                   , const size_t i6 , const size_t i7 );
-};
+namespace Kokkos {
+namespace Impl {
+
+using Kokkos::Experimental::Impl::ViewMapping ;
+using Kokkos::Experimental::Impl::ViewDataAnalysis ;
 
 } /* namespace Impl */
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 /** \class ViewTraits
  *  \brief Traits class for accessing attributes of a View.
@@ -168,8 +157,7 @@ struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_space<Space>::value
 
   typedef typename Space::execution_space                   execution_space ;
   typedef typename Space::memory_space                      memory_space ;
-  typedef typename Kokkos::Impl::is_space< Space >::host_mirror_space
-      HostMirrorSpace ;
+  typedef typename Kokkos::Impl::HostMirror< Space >::Space HostMirrorSpace ;
   typedef typename execution_space::array_layout            array_layout ;
   typedef typename ViewTraits<void,Prop...>::memory_traits  memory_traits ;
 };
@@ -225,7 +213,7 @@ private:
     std::conditional
       < ! std::is_same< typename prop::HostMirrorSpace , void >::value
       , typename prop::HostMirrorSpace
-      , typename Kokkos::Impl::is_space< ExecutionSpace >::host_mirror_space
+      , typename Kokkos::Impl::HostMirror< ExecutionSpace >::Space
       >::type
       HostMirrorSpace ;
 
@@ -238,7 +226,7 @@ private:
 
   // Analyze data type's properties,
   // May be specialized based upon the layout and value type
-  typedef Kokkos::Experimental::Impl::ViewDataAnalysis< DataType , ArrayLayout > data_analysis ;
+  typedef Kokkos::Impl::ViewDataAnalysis< DataType , ArrayLayout > data_analysis ;
 
 public:
 
@@ -376,31 +364,29 @@ public:
 template< class DataType , class ... Properties >
 class View ;
 
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#include <impl/KokkosExp_ViewMapping.hpp>
-#include <impl/KokkosExp_ViewArray.hpp>
+#include <impl/Kokkos_ViewMapping.hpp>
+#include <impl/Kokkos_ViewArray.hpp>
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 namespace {
 
-constexpr Kokkos::Experimental::Impl::ALL_t
-  ALL = Kokkos::Experimental::Impl::ALL_t();
+constexpr Kokkos::Impl::ALL_t
+  ALL = Kokkos::Impl::ALL_t();
 
-constexpr Kokkos::Experimental::Impl::WithoutInitializing_t
-  WithoutInitializing = Kokkos::Experimental::Impl::WithoutInitializing_t();
+constexpr Kokkos::Impl::WithoutInitializing_t
+  WithoutInitializing = Kokkos::Impl::WithoutInitializing_t();
 
-constexpr Kokkos::Experimental::Impl::AllowPadding_t
-  AllowPadding        = Kokkos::Experimental::Impl::AllowPadding_t();
+constexpr Kokkos::Impl::AllowPadding_t
+  AllowPadding        = Kokkos::Impl::AllowPadding_t();
 
 }
 
@@ -446,14 +432,12 @@ view_wrap( Args const & ... args )
   return return_type( args... );
 }
 
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 template< class DataType , class ... Properties >
 class View ;
@@ -471,7 +455,7 @@ class View : public ViewTraits< DataType , Properties ... > {
 private:
 
   template< class , class ... > friend class View ;
-  template< class , class ... > friend class Impl::ViewMapping ;
+  template< class , class ... > friend class Kokkos::Impl::ViewMapping ;
 
 public:
 
@@ -479,8 +463,8 @@ public:
 
 private:
 
-  typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker      track_type ;
+  typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
+  typedef Kokkos::Impl::SharedAllocationTracker      track_type ;
 
   track_type  m_track ;
   map_type    m_map ;
@@ -607,7 +591,7 @@ public:
   // Allow specializations to query their specialized map
 
   KOKKOS_INLINE_FUNCTION
-  const Kokkos::Experimental::Impl::ViewMapping< traits , void > &
+  const Kokkos::Impl::ViewMapping< traits , void > &
   implementation_map() const { return m_map ; }
 
   //----------------------------------------
@@ -629,18 +613,24 @@ private:
       ( is_layout_left || is_layout_right || is_layout_stride )
   };
 
+  template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space
+    { KOKKOS_FORCEINLINE_FUNCTION static void check() {} };
+
+  template< class Space > struct verify_space<Space,false>
+    { KOKKOS_FORCEINLINE_FUNCTION static void check()
+        { Kokkos::abort("Kokkos::View ERROR: attempt to access inaccessible memory space"); };
+    };
+
 #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
 
 #define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
-  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
-    < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \
-  Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ;
+  View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
+  Kokkos::Impl::view_verify_operator_bounds ARG ;
 
 #else
 
 #define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
-  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
-    < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify();
+  View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
 
 #endif
 
@@ -656,7 +646,11 @@ public:
                           ), reference_type >::type
   operator()( Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
+      #endif
 
       return m_map.reference();
     }
@@ -675,7 +669,11 @@ public:
   operator()( const I0 & i0
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
+      #endif
 
       return m_map.reference(i0);
     }
@@ -692,7 +690,12 @@ public:
   operator()( const I0 & i0
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) )
+
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
+      #endif
 
       return m_map.m_handle[ i0 ];
     }
@@ -709,7 +712,11 @@ public:
   operator()( const I0 & i0
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
     }
@@ -726,7 +733,11 @@ public:
     ), reference_type >::type
   operator[]( const I0 & i0 ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
+      #endif
 
       return m_map.reference(i0);
     }
@@ -741,7 +752,11 @@ public:
     ), reference_type >::type
   operator[]( const I0 & i0 ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
+      #endif
 
       return m_map.m_handle[ i0 ];
     }
@@ -756,7 +771,11 @@ public:
     ), reference_type >::type
   operator[]( const I0 & i0 ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
     }
@@ -775,7 +794,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
+      #endif
 
       return m_map.reference(i0,i1);
     }
@@ -792,7 +815,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
+      #endif
 
       return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
     }
@@ -809,7 +836,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
+      #endif
 
       return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
     }
@@ -826,7 +857,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
+      #endif
 
       return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
     }
@@ -843,7 +878,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
+      #endif
 
       return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
     }
@@ -860,7 +899,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
+      #endif
 
       return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
                              i1 * m_map.m_offset.m_stride.S1 ];
@@ -880,7 +923,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
     }
@@ -896,7 +943,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
+      #endif
 
       return m_map.reference(i0,i1,i2);
     }
@@ -915,7 +966,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
     }
@@ -931,7 +986,11 @@ public:
   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
+      #endif
 
       return m_map.reference(i0,i1,i2,i3);
     }
@@ -952,7 +1011,11 @@ public:
             , const I4 & i4
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
     }
@@ -970,7 +1033,11 @@ public:
             , const I4 & i4
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
+      #endif
 
       return m_map.reference(i0,i1,i2,i3,i4);
     }
@@ -991,7 +1058,11 @@ public:
             , const I4 & i4 , const I5 & i5
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
     }
@@ -1009,7 +1080,11 @@ public:
             , const I4 & i4 , const I5 & i5
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
+      #endif
 
       return m_map.reference(i0,i1,i2,i3,i4,i5);
     }
@@ -1030,7 +1105,11 @@ public:
             , const I4 & i4 , const I5 & i5 , const I6 & i6
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
     }
@@ -1048,7 +1127,11 @@ public:
             , const I4 & i4 , const I5 & i5 , const I6 & i6
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+      #endif
 
       return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
     }
@@ -1069,7 +1152,11 @@ public:
             , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+      #endif
 
       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
     }
@@ -1087,7 +1174,11 @@ public:
             , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7
             , Args ... args ) const
     {
-      KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+      #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+        KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+      #else
+        KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+      #endif
 
       return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
     }
@@ -1126,7 +1217,7 @@ public:
     , m_map()
     {
       typedef typename View<RT,RP...>::traits  SrcTraits ;
-      typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible View copy construction" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
     }
@@ -1136,7 +1227,7 @@ public:
   View & operator = ( const View<RT,RP...> & rhs )
     {
       typedef typename View<RT,RP...>::traits  SrcTraits ;
-      typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible View copy assignment" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
       m_track.assign( rhs.m_track , traits::is_managed );
@@ -1156,14 +1247,14 @@ public:
     {
       typedef View< RT , RP... > SrcType ;
 
-      typedef Kokkos::Experimental::Impl::ViewMapping
+      typedef Kokkos::Impl::ViewMapping
         < void /* deduce destination view type from source view traits */
         , typename SrcType::traits
         , Arg0 , Args... > Mapping ;
 
       typedef typename Mapping::type DstType ;
 
-      static_assert( Kokkos::Experimental::Impl::ViewMapping< traits , typename DstType::traits , void >::is_assignable
+      static_assert( Kokkos::Impl::ViewMapping< traits , typename DstType::traits , void >::is_assignable
         , "Subview construction requires compatible view and subview arguments" );
 
       Mapping::assign( m_map, src_view.m_map, arg0 , args... );
@@ -1243,7 +1334,7 @@ public:
 #endif
 //------------------------------------------------------------
 
-      Kokkos::Experimental::Impl::SharedAllocationRecord<> *
+      Kokkos::Impl::SharedAllocationRecord<> *
         record = m_map.allocate_shared( prop , arg_layout );
 
 //------------------------------------------------------------
@@ -1324,7 +1415,7 @@ public:
   explicit inline
   View( const Label & arg_label
       , typename std::enable_if<
-          Kokkos::Experimental::Impl::is_view_label<Label>::value ,
+          Kokkos::Impl::is_view_label<Label>::value ,
           typename traits::array_layout >::type const & arg_layout
       )
     : View( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout )
@@ -1335,7 +1426,7 @@ public:
   explicit inline
   View( const Label & arg_label
       , typename std::enable_if<
-          Kokkos::Experimental::Impl::is_view_label<Label>::value ,
+          Kokkos::Impl::is_view_label<Label>::value ,
         const size_t >::type arg_N0 = 0
       , const size_t arg_N1 = 0
       , const size_t arg_N2 = 0
@@ -1357,7 +1448,7 @@ public:
   View( const ViewAllocateWithoutInitializing & arg_prop
       , const typename traits::array_layout & arg_layout
       )
-    : View( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing )
+    : View( Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing )
           , arg_layout
           )
     {}
@@ -1373,7 +1464,7 @@ public:
       , const size_t arg_N6 = 0
       , const size_t arg_N7 = 0
       )
-    : View( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing )
+    : View( Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing )
           , typename traits::array_layout
               ( arg_N0 , arg_N1 , arg_N2 , arg_N3
               , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
@@ -1499,7 +1590,7 @@ public:
 
 template< class V , class ... Args >
 using Subview =
-  typename Kokkos::Experimental::Impl::ViewMapping
+  typename Kokkos::Impl::ViewMapping
     < void /* deduce subview type from source view traits */
     , typename V::traits
     , Args ...
@@ -1507,7 +1598,7 @@ using Subview =
 
 template< class D, class ... P , class ... Args >
 KOKKOS_INLINE_FUNCTION
-typename Kokkos::Experimental::Impl::ViewMapping
+typename Kokkos::Impl::ViewMapping
   < void /* deduce subview type from source view traits */
   , ViewTraits< D , P... >
   , Args ...
@@ -1518,7 +1609,7 @@ subview( const View< D, P... > & src , Args ... args )
     "subview requires one argument for each source View rank" );
 
   return typename
-    Kokkos::Experimental::Impl::ViewMapping
+    Kokkos::Impl::ViewMapping
       < void /* deduce subview type from source view traits */
       , ViewTraits< D , P ... >
       , Args ... >::type( src , args ... );
@@ -1526,7 +1617,7 @@ subview( const View< D, P... > & src , Args ... args )
 
 template< class MemoryTraits , class D, class ... P , class ... Args >
 KOKKOS_INLINE_FUNCTION
-typename Kokkos::Experimental::Impl::ViewMapping
+typename Kokkos::Impl::ViewMapping
   < void /* deduce subview type from source view traits */
   , ViewTraits< D , P... >
   , Args ...
@@ -1537,7 +1628,7 @@ subview( const View< D, P... > & src , Args ... args )
     "subview requires one argument for each source View rank" );
 
   return typename
-    Kokkos::Experimental::Impl::ViewMapping
+    Kokkos::Impl::ViewMapping
       < void /* deduce subview type from source view traits */
       , ViewTraits< D , P ... >
       , Args ... >
@@ -1545,16 +1636,12 @@ subview( const View< D, P... > & src , Args ... args )
       ::type( src , args ... );
 }
 
-
-
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 template< class LT , class ... LP , class RT , class ... RP >
 KOKKOS_INLINE_FUNCTION
@@ -1593,7 +1680,6 @@ bool operator != ( const View<LT,LP...> & lhs ,
   return ! ( operator==(lhs,rhs) );
 }
 
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
@@ -1604,11 +1690,11 @@ namespace Impl {
 
 inline
 void shared_allocation_tracking_claim_and_disable()
-{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_claim_and_disable(); }
+{ Kokkos::Impl::SharedAllocationRecord<void,void>::tracking_claim_and_disable(); }
 
 inline
 void shared_allocation_tracking_release_and_enable()
-{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_release_and_enable(); }
+{ Kokkos::Impl::SharedAllocationRecord<void,void>::tracking_release_and_enable(); }
 
 } /* namespace Impl */
 } /* namespace Kokkos */
@@ -1617,7 +1703,6 @@ void shared_allocation_tracking_release_and_enable()
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 template< class OutputView , typename Enable = void >
@@ -1719,14 +1804,12 @@ struct ViewRemap {
 };
 
 } /* namespace Impl */
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 /** \brief  Deep copy a value from Host memory into a view.  */
 template< class DT , class ... DP >
@@ -1743,7 +1826,7 @@ void deep_copy
                   typename ViewTraits<DT,DP...>::value_type >::value
     , "deep_copy requires non-const type" );
 
-  Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value );
+  Kokkos::Impl::ViewFill< View<DT,DP...> >( dst , value );
 }
 
 /** \brief  Deep copy into a value in Host memory from a view.  */
@@ -1830,10 +1913,10 @@ void deep_copy
   typedef typename src_type::memory_space     src_memory_space ;
 
   enum { DstExecCanAccessSrc =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
 
   enum { SrcExecCanAccessDst =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible };
 
 
   if ( (void *) dst.data() != (void*) src.data() ) {
@@ -1916,11 +1999,11 @@ void deep_copy
     }
     else if ( DstExecCanAccessSrc ) {
       // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
-      Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
+      Kokkos::Impl::ViewRemap< dst_type , src_type >( dst , src );
     }
     else if ( SrcExecCanAccessDst ) {
       // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
-      Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src );
+      Kokkos::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src );
     }
     else {
       Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
@@ -1928,14 +2011,12 @@ void deep_copy
   }
 }
 
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 /** \brief  Deep copy a value from Host memory into a view.  */
 template< class ExecSpace ,class DT , class ... DP >
@@ -1954,7 +2035,7 @@ void deep_copy
                   typename ViewTraits<DT,DP...>::value_type >::value
     , "deep_copy requires non-const type" );
 
-  Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value );
+  Kokkos::Impl::ViewFill< View<DT,DP...> >( dst , value );
 }
 
 /** \brief  Deep copy into a value in Host memory from a view.  */
@@ -2049,10 +2130,10 @@ void deep_copy
   typedef typename src_type::memory_space     src_memory_space ;
 
   enum { DstExecCanAccessSrc =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
 
   enum { SrcExecCanAccessDst =
-   Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value };
+   Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible };
 
   if ( (void *) dst.data() != (void*) src.data() ) {
 
@@ -2089,11 +2170,11 @@ void deep_copy
     }
     else if ( DstExecCanAccessSrc ) {
       // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
-      Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
+      Kokkos::Impl::ViewRemap< dst_type , src_type >( dst , src );
     }
     else if ( SrcExecCanAccessDst ) {
       // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
-      Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src );
+      Kokkos::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src );
     }
     else {
       Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
@@ -2101,21 +2182,19 @@ void deep_copy
   }
 }
 
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 // Deduce Mirror Types
 template<class Space, class T, class ... P>
 struct MirrorViewType {
   // The incoming view_type
-  typedef typename Kokkos::Experimental::View<T,P...> src_view_type;
+  typedef typename Kokkos::View<T,P...> src_view_type;
   // The memory space for the mirror view
   typedef typename Space::memory_space memory_space;
   // Check whether it is the same memory space
@@ -2125,7 +2204,7 @@ struct MirrorViewType {
   // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
   typedef typename src_view_type::non_const_data_type data_type;
   // The destination view type if it is not the same memory space
-  typedef Kokkos::Experimental::View<data_type,array_layout,Space> dest_view_type;
+  typedef Kokkos::View<data_type,array_layout,Space> dest_view_type;
   // If it is the same memory_space return the existsing view_type
   // This will also keep the unmanaged trait if necessary
   typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type;
@@ -2134,7 +2213,7 @@ struct MirrorViewType {
 template<class Space, class T, class ... P>
 struct MirrorType {
   // The incoming view_type
-  typedef typename Kokkos::Experimental::View<T,P...> src_view_type;
+  typedef typename Kokkos::View<T,P...> src_view_type;
   // The memory space for the mirror view
   typedef typename Space::memory_space memory_space;
   // Check whether it is the same memory space
@@ -2144,17 +2223,17 @@ struct MirrorType {
   // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
   typedef typename src_view_type::non_const_data_type data_type;
   // The destination view type if it is not the same memory space
-  typedef Kokkos::Experimental::View<data_type,array_layout,Space> view_type;
+  typedef Kokkos::View<data_type,array_layout,Space> view_type;
 };
 
 }
 
 template< class T , class ... P >
 inline
-typename Kokkos::Experimental::View<T,P...>::HostMirror
-create_mirror( const Kokkos::Experimental::View<T,P...> & src
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror( const Kokkos::View<T,P...> & src
              , typename std::enable_if<
-                 ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
+                 ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
                                , Kokkos::LayoutStride >::value
                >::type * = 0
              )
@@ -2175,10 +2254,10 @@ create_mirror( const Kokkos::Experimental::View<T,P...> & src
 
 template< class T , class ... P >
 inline
-typename Kokkos::Experimental::View<T,P...>::HostMirror
-create_mirror( const Kokkos::Experimental::View<T,P...> & src
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror( const Kokkos::View<T,P...> & src
              , typename std::enable_if<
-                 std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
+                 std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
                              , Kokkos::LayoutStride >::value
                >::type * = 0
              )
@@ -2212,21 +2291,21 @@ create_mirror( const Kokkos::Experimental::View<T,P...> & src
 
 // Create a mirror in a new space (specialization for different space)
 template<class Space, class T, class ... P>
-typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::Experimental::View<T,P...> & src) {
+typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::View<T,P...> & src) {
   return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout());
 }
 
 template< class T , class ... P >
 inline
-typename Kokkos::Experimental::View<T,P...>::HostMirror
-create_mirror_view( const Kokkos::Experimental::View<T,P...> & src
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror_view( const Kokkos::View<T,P...> & src
                   , typename std::enable_if<(
-                      std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space
-                                  , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space
+                      std::is_same< typename Kokkos::View<T,P...>::memory_space
+                                  , typename Kokkos::View<T,P...>::HostMirror::memory_space
                                   >::value
                       &&
-                      std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type
-                                  , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type
+                      std::is_same< typename Kokkos::View<T,P...>::data_type
+                                  , typename Kokkos::View<T,P...>::HostMirror::data_type
                                   >::value
                     )>::type * = 0
                   )
@@ -2236,26 +2315,26 @@ create_mirror_view( const Kokkos::Experimental::View<T,P...> & src
 
 template< class T , class ... P >
 inline
-typename Kokkos::Experimental::View<T,P...>::HostMirror
-create_mirror_view( const Kokkos::Experimental::View<T,P...> & src
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror_view( const Kokkos::View<T,P...> & src
                   , typename std::enable_if< ! (
-                      std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space
-                                  , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space
+                      std::is_same< typename Kokkos::View<T,P...>::memory_space
+                                  , typename Kokkos::View<T,P...>::HostMirror::memory_space
                                   >::value
                       &&
-                      std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type
-                                  , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type
+                      std::is_same< typename Kokkos::View<T,P...>::data_type
+                                  , typename Kokkos::View<T,P...>::HostMirror::data_type
                                   >::value
                     )>::type * = 0
                   )
 {
-  return Kokkos::Experimental::create_mirror( src );
+  return Kokkos::create_mirror( src );
 }
 
 // Create a mirror view in a new space (specialization for same space)
 template<class Space, class T, class ... P>
 typename Impl::MirrorViewType<Space,T,P ...>::view_type
-create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src
+create_mirror_view(const Space& , const Kokkos::View<T,P...> & src
   , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
   return src;
 }
@@ -2263,24 +2342,26 @@ create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src
 // Create a mirror view in a new space (specialization for different space)
 template<class Space, class T, class ... P>
 typename Impl::MirrorViewType<Space,T,P ...>::view_type
-create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src
+create_mirror_view(const Space& , const Kokkos::View<T,P...> & src
   , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
   return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout());
 }
 
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 
 /** \brief  Resize a view with copying old data to new data at the corresponding indices. */
 template< class T , class ... P >
 inline
-void resize( Kokkos::Experimental::View<T,P...> & v ,
+typename std::enable_if<
+  std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutLeft>::value ||
+  std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutRight>::value
+>::type
+resize( Kokkos::View<T,P...> & v ,
              const size_t n0 = 0 ,
              const size_t n1 = 0 ,
              const size_t n2 = 0 ,
@@ -2290,13 +2371,13 @@ void resize( Kokkos::Experimental::View<T,P...> & v ,
              const size_t n6 = 0 ,
              const size_t n7 = 0 )
 {
-  typedef Kokkos::Experimental::View<T,P...>  view_type ;
+  typedef Kokkos::View<T,P...>  view_type ;
 
-  static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
+  static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
 
   view_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6, n7 );
 
-  Kokkos::Experimental::Impl::ViewRemap< view_type , view_type >( v_resized , v );
+  Kokkos::Impl::ViewRemap< view_type , view_type >( v_resized , v );
 
   v = v_resized ;
 }
@@ -2304,7 +2385,28 @@ void resize( Kokkos::Experimental::View<T,P...> & v ,
 /** \brief  Resize a view with copying old data to new data at the corresponding indices. */
 template< class T , class ... P >
 inline
-void realloc( Kokkos::Experimental::View<T,P...> & v ,
+void resize(       Kokkos::View<T,P...> & v ,
+    const typename Kokkos::View<T,P...>::array_layout & layout)
+{
+  typedef Kokkos::View<T,P...>  view_type ;
+
+  static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
+
+  view_type v_resized( v.label(), layout );
+
+  Kokkos::Impl::ViewRemap< view_type , view_type >( v_resized , v );
+
+  v = v_resized ;
+}
+
+/** \brief  Resize a view with discarding old data. */
+template< class T , class ... P >
+inline
+typename std::enable_if<
+  std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutLeft>::value ||
+  std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutRight>::value
+>::type
+realloc( Kokkos::View<T,P...> & v ,
               const size_t n0 = 0 ,
               const size_t n1 = 0 ,
               const size_t n2 = 0 ,
@@ -2314,9 +2416,9 @@ void realloc( Kokkos::Experimental::View<T,P...> & v ,
               const size_t n6 = 0 ,
               const size_t n7 = 0 )
 {
-  typedef Kokkos::Experimental::View<T,P...>  view_type ;
+  typedef Kokkos::View<T,P...>  view_type ;
 
-  static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
+  static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
 
   const std::string label = v.label();
 
@@ -2324,40 +2426,65 @@ void realloc( Kokkos::Experimental::View<T,P...> & v ,
   v = view_type( label, n0, n1, n2, n3, n4, n5, n6, n7 );
 }
 
-} /* namespace Experimental */
+/** \brief  Resize a view with discarding old data. */
+template< class T , class ... P >
+inline
+void realloc(      Kokkos::View<T,P...> & v ,
+    const typename Kokkos::View<T,P...>::array_layout & layout)
+{
+  typedef Kokkos::View<T,P...>  view_type ;
+
+  static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
+
+  const std::string label = v.label();
+
+  v = view_type(); // Deallocate first, if the only view to allocation
+  v = view_type( label, layout );
+}
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
+// For backward compatibility:
 
 namespace Kokkos {
+namespace Experimental {
 
-template< class D , class ... P >
-using ViewTraits = Kokkos::Experimental::ViewTraits<D,P...> ;
+using Kokkos::ViewTraits ;
+using Kokkos::View ;
+using Kokkos::Subview ;
+using Kokkos::is_view ;
+using Kokkos::subview ;
+using Kokkos::ALL ;
+using Kokkos::WithoutInitializing ;
+using Kokkos::AllowPadding ;
+using Kokkos::view_alloc ;
+using Kokkos::view_wrap ;
+using Kokkos::deep_copy ;
+using Kokkos::create_mirror ;
+using Kokkos::create_mirror_view ;
+using Kokkos::resize ;
+using Kokkos::realloc ;
 
-using Experimental::View ; //modified due to gcc parser bug 
-//template< class D , class ... P >
-//using View = Kokkos::Experimental::View<D,P...> ;
+namespace Impl {
 
-using Kokkos::Experimental::ALL ;
-using Kokkos::Experimental::WithoutInitializing ;
-using Kokkos::Experimental::AllowPadding ;
-using Kokkos::Experimental::view_alloc ;
-using Kokkos::Experimental::view_wrap ;
+using Kokkos::Impl::ViewFill ;
+using Kokkos::Impl::ViewRemap ;
+using Kokkos::Impl::ViewCtorProp ;
+using Kokkos::Impl::is_view_label ;
+using Kokkos::Impl::WithoutInitializing_t ;
+using Kokkos::Impl::AllowPadding_t ;
+using Kokkos::Impl::SharedAllocationRecord ;
+using Kokkos::Impl::SharedAllocationTracker ;
 
-using Kokkos::Experimental::deep_copy ;
-using Kokkos::Experimental::create_mirror ;
-using Kokkos::Experimental::create_mirror_view ;
-using Kokkos::Experimental::subview ;
-using Kokkos::Experimental::resize ;
-using Kokkos::Experimental::realloc ;
-using Kokkos::Experimental::is_view ;
+} /* namespace Impl */
+} /* namespace Experimental */
+} /* namespace Kokkos */
 
+namespace Kokkos {
 namespace Impl {
 
-using Kokkos::Experimental::is_view ;
-
-class ViewDefault {};
+using Kokkos::is_view ;
 
 template< class SrcViewType
         , class Arg0Type
@@ -2371,8 +2498,7 @@ template< class SrcViewType
         >
 struct ViewSubview /* { typedef ... type ; } */ ;
 
-}
-
+} /* namespace Impl */
 } /* namespace Kokkos */
 
 #include <impl/Kokkos_Atomic_View.hpp>
diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile
index dc27d341ac8ee4a40150bc93476b994666189739..316f61fd4d9fcd4c7ce4ec37592659deef006bce 100644
--- a/lib/kokkos/core/src/Makefile
+++ b/lib/kokkos/core/src/Makefile
@@ -1,24 +1,25 @@
-KOKKOS_PATH = ../..
+ifndef KOKKOS_PATH
+  MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+  KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../..
+endif
 
 PREFIX ?= /usr/local/lib/kokkos
 
 default: messages build-lib
 	echo "End Build"
 
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(NVCC_WRAPPER)
-	LINKFLAGS ?= 
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= g++
-	LINKFLAGS ?=  
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= 
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 PWD = $(shell pwd)
 
 KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
@@ -49,6 +50,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	CONDITIONAL_COPIES += copy-openmp
 endif
 
+ifeq ($(KOKKOS_OS),CYGWIN)
+  COPY_FLAG = -u
+endif
+ifeq ($(KOKKOS_OS),Linux)
+  COPY_FLAG = -u
+endif
+ifeq ($(KOKKOS_OS),Darwin)
+  COPY_FLAG =
+endif
+
 messages: 
 	echo "Start Build"
 
@@ -77,6 +88,15 @@ build-makefile-kokkos:
 	echo "KOKKOS_LINK_DEPENDS  = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos
 	echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos
 	echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos
+	echo "" >> Makefile.kokkos
+	echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos
+	echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos
+	echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos
+	echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos
+	echo "" >> Makefile.kokkos
+	echo "#Fake kokkos-clean target" >> Makefile.kokkos
+	echo "kokkos-clean:" >> Makefile.kokkos
+	echo "" >> Makefile.kokkos
 	sed \
 		-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
 		-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
@@ -98,27 +118,27 @@ mkdir:
 
 copy-cuda: mkdir
 	mkdir -p $(PREFIX)/include/Cuda
-	cp $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
+	cp $(COPY_FLAG) $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
 
 copy-threads: mkdir
 	mkdir -p $(PREFIX)/include/Threads
-	cp $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
+	cp $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
 
 copy-qthread: mkdir
 	mkdir -p $(PREFIX)/include/Qthread
-	cp $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread
+	cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread
 
 copy-openmp: mkdir
 	mkdir -p $(PREFIX)/include/OpenMP
-	cp $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
+	cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
 
 install: mkdir $(CONDITIONAL_COPIES) build-lib 
-	cp $(NVCC_WRAPPER) $(PREFIX)/bin
-	cp $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
-	cp $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
-	cp Makefile.kokkos $(PREFIX)
-	cp libkokkos.a $(PREFIX)/lib
-	cp KokkosCore_config.h $(PREFIX)/include
+	cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin
+	cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
+	cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
+	cp $(COPY_FLAG) Makefile.kokkos $(PREFIX)
+	cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib
+	cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include
 
 clean: kokkos-clean
 	rm -f Makefile.kokkos
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
index 3e22033f7c058dc6c084c445685c80beb8620da8..00a9957ee1b4291c48d1cac4d44c7669268b36ff 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
@@ -43,7 +43,7 @@
 
 #include <Kokkos_Core.hpp>
 
-#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
 
 #include <impl/Kokkos_TaskQueue_impl.hpp>
 
@@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
 
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
 
 
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp
index 2761247c40c930d1b454acfc373be2c8d8aaf4a3..15dbb77c26c7432497417b0b27508b00d3d717af 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp
@@ -44,7 +44,7 @@
 #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP
 #define KOKKOS_IMPL_OPENMP_TASK_HPP
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
@@ -156,21 +156,30 @@ template<typename iType>
 KOKKOS_INLINE_FUNCTION
 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >
 TeamThreadRange
-  ( Impl::TaskExec< Kokkos::OpenMP > & thread
-  , const iType & count )
+  ( Impl::TaskExec< Kokkos::OpenMP > & thread, const iType & count )
 {
   return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count);
 }
 
-template<typename iType>
+template<typename iType1, typename iType2>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::TaskExec< Kokkos::OpenMP > >
 TeamThreadRange
-  ( Impl:: TaskExec< Kokkos::OpenMP > & thread
-  , const iType & start
-  , const iType & end )
+  ( Impl:: TaskExec< Kokkos::OpenMP > & thread, const iType1 & begin, const iType2 & end )
+{
+  typedef typename std::common_type<iType1, iType2>::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::TaskExec< Kokkos::OpenMP > >(thread, begin, end);
+}
+
+template<typename iType>
+KOKKOS_INLINE_FUNCTION
+Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >
+ThreadVectorRange
+  ( Impl::TaskExec< Kokkos::OpenMP > & thread
+  , const iType & count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >(thread,start,end);
+  return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count);
 }
 
 /** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
@@ -351,6 +360,6 @@ void parallel_scan
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 #endif /* #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP */
 
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
index 7d06a2f66149f93bd43d6a4976ae9060b8833997..25e7d89277fe3dc46df093235a3195ef6bdeedd8 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
@@ -300,12 +300,12 @@ void OpenMP::initialize( unsigned thread_count ,
   }
 
   // Check for over-subscription
-  if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) {
-    std::cout << "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl;
-    std::cout << "                                    Detected: " << Impl::processors_per_node() << " cores per node." << std::endl;
-    std::cout << "                                    Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl;
-    std::cout << "                                    Requested: " << thread_count << " threads per process." << std::endl;
-  }
+  //if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) {
+  //  std::cout << "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl;
+  //  std::cout << "                                    Detected: " << Impl::processors_per_node() << " cores per node." << std::endl;
+  //  std::cout << "                                    Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl;
+  //  std::cout << "                                    Requested: " << thread_count << " threads per process." << std::endl;
+  //}
   // Init the array for used for arbitrarily sized atomics
   Impl::init_lock_array_host_space();
 
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
index a01c9cb644e86f423409f1eeb56a014b68f87968..a2bfa742d1bad422af65b2dd712f9370b39ddf61 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -180,7 +180,7 @@ public:
     bool success = false;
     while(!success) {
       work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new);
-      success = ( (work_range_new == work_range_old) || 
+      success = ( (work_range_new == work_range_old) ||
                   (work_range_new.first>=work_range_new.second));
       work_range_old = work_range_new;
       work_range_new.first+=1;
@@ -393,12 +393,14 @@ public:
     typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE
                          , ValueType , void >::type type ;
 
-    type * const local_value = ((type*) m_exec.scratch_thread());
-    if(team_rank() == thread_id)
-      *local_value = value;
+    type volatile * const shared_value =
+      ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread());
+
+    if ( team_rank() == thread_id ) *shared_value = value;
     memory_fence();
-    team_barrier();
-    value = *local_value;
+    team_barrier(); // Wait for 'thread_id' to write
+    value = *shared_value ;
+    team_barrier(); // Wait for team members to read
 #endif
   }
 
@@ -655,8 +657,6 @@ public:
   static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; }
 };
 
-
-
 template< class ... Properties >
 class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits<Properties ...>
 {
@@ -740,9 +740,9 @@ public:
 
   inline int team_size()   const { return m_team_size ; }
   inline int league_size() const { return m_league_size ; }
+
   inline size_t scratch_size(const int& level, int team_size_ = -1) const {
-    if(team_size_ < 0)
-      team_size_ = m_team_size;
+    if(team_size_ < 0) team_size_ = m_team_size;
     return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level] ;
   }
 
@@ -840,7 +840,6 @@ public:
 };
 } // namespace Impl
 
-
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
@@ -864,29 +863,26 @@ int OpenMP::thread_pool_rank()
 #endif
 }
 
-} // namespace Kokkos
-
-
-namespace Kokkos {
-
-template<typename iType>
+template< typename iType >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
-  TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,count);
+Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >
+TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType& count ) {
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, count );
 }
 
-template<typename iType>
+template< typename iType1, typename iType2 >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
-  TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& begin, const iType& end) {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,begin,end);
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::OpenMPexecTeamMember >
+TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType1& begin, const iType2& end ) {
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, iType(begin), iType(end) );
 }
 
 template<typename iType>
 KOKKOS_INLINE_FUNCTION
 Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >
-  ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
+ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
   return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count);
 }
 
@@ -899,6 +895,7 @@ KOKKOS_INLINE_FUNCTION
 Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) {
   return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread);
 }
+
 } // namespace Kokkos
 
 namespace Kokkos {
@@ -959,7 +956,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Ope
 
 } //namespace Kokkos
 
-
 namespace Kokkos {
 /** \brief  Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
  *
@@ -1080,4 +1076,3 @@ void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_s
 }
 
 #endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */
-
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
index 5b6419289fc4874f1d97034aa7decd9be0eca147..8ee70b9efa6220a93494472cc67b73641886f9ed 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
@@ -511,6 +511,7 @@ public:
 };
 
 } // namespace Impl
+
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
@@ -518,26 +519,24 @@ public:
 
 namespace Kokkos {
 
-template<typename iType>
+template< typename iType >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>
-TeamThreadRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count)
+Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >
+TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType& count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,count);
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, count );
 }
 
-template<typename iType>
+template< typename iType1, typename iType2 >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>
-TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread
-               , const iType & begin
-               , const iType & end
-               )
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::QthreadTeamPolicyMember >
+TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType1 & begin, const iType2 & end )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,begin,end);
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, iType(begin), iType(end) );
 }
 
-
 template<typename iType>
 KOKKOS_INLINE_FUNCTION
 Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >
@@ -545,7 +544,6 @@ Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >
   return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >(thread,count);
 }
 
-
 KOKKOS_INLINE_FUNCTION
 Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember> PerTeam(const Impl::QthreadTeamPolicyMember& thread) {
   return Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>(thread);
@@ -556,14 +554,10 @@ Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::Qt
   return Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>(thread);
 }
 
-} // namespace Kokkos
-
-namespace Kokkos {
-
-  /** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
-   *
-   * The range i=0..N-1 is mapped to all threads of the the calling thread team.
-   * This functionality requires C++11 support.*/
+/** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all threads of the the calling thread team.
+ * This functionality requires C++11 support.*/
 template<typename iType, class Lambda>
 KOKKOS_INLINE_FUNCTION
 void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, const Lambda& lambda) {
@@ -618,9 +612,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
 
 #endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
 
-} // namespace Kokkos
-
-namespace Kokkos {
 /** \brief  Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
  *
  * The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
@@ -707,10 +698,6 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth
   }
 }
 
-} // namespace Kokkos
-
-namespace Kokkos {
-
 template<class FunctorType>
 KOKKOS_INLINE_FUNCTION
 void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) {
@@ -740,6 +727,4 @@ void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& singl
 
 } // namespace Kokkos
 
-
 #endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */
-
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
index 8cc39d277c1949dc7f9587c09b77d5a71ffddeba..e651b9fdb8cf8ad2f5067215bec6aff60a5c6adb 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
@@ -58,7 +58,7 @@
 #include <Kokkos_Atomic.hpp>
 #include <Qthread/Kokkos_Qthread_TaskPolicy.hpp>
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
 
 //----------------------------------------------------------------------------
 
@@ -196,7 +196,7 @@ void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
   static const char msg_error_dependences[] = ": destroy task that has dependences" ;
   static const char msg_error_exception[]   = ": caught internal exception" ;
 
-  if ( rhs ) { Kokkos::atomic_fetch_add( & (*rhs).m_ref_count , 1 ); }
+  if ( rhs ) { Kokkos::atomic_increment( &(*rhs).m_ref_count ); }
 
   Task * const lhs_val = Kokkos::atomic_exchange( lhs , rhs );
 
@@ -486,6 +486,6 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy )
 } // namespace Experimental
 } // namespace Kokkos
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 #endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
 
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp
index 22a565503dd59626057bae12ef01cb9abdb994f9..565dbf7e61716717bdbac0e1b3adf007493cf27d 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp
@@ -43,8 +43,8 @@
 
 // Experimental unified task-data parallel manycore LDRD
 
-#ifndef KOKKOS_QTHREAD_TASKPOLICY_HPP
-#define KOKKOS_QTHREAD_TASKPOLICY_HPP
+#ifndef KOKKOS_QTHREAD_TASKSCHEDULER_HPP
+#define KOKKOS_QTHREAD_TASKSCHEDULER_HPP
 
 #include <string>
 #include <typeinfo>
@@ -64,12 +64,12 @@
 //----------------------------------------------------------------------------
 
 #include <Kokkos_Qthread.hpp>
-#include <Kokkos_TaskPolicy.hpp>
+#include <Kokkos_TaskScheduler.hpp>
 #include <Kokkos_View.hpp>
 
 #include <impl/Kokkos_FunctorAdapter.hpp>
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
 
 //----------------------------------------------------------------------------
 
@@ -154,7 +154,7 @@ public:
   KOKKOS_FUNCTION static
   TaskMember * verify_type( TaskMember * t )
     {
-      enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
+      enum { check_type = ! std::is_same< ResultType , void >::value };
 
       if ( check_type && t != 0 ) {
 
@@ -298,7 +298,7 @@ public:
 
   template< class FunctorType , class ResultType >
   KOKKOS_INLINE_FUNCTION static
-  void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
+  void apply_single( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t )
     {
       typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
 
@@ -314,7 +314,7 @@ public:
 
   template< class FunctorType , class ResultType >
   KOKKOS_INLINE_FUNCTION static
-  void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
+  void apply_single( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t )
     {
       typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
 
@@ -332,7 +332,7 @@ public:
 
   template< class FunctorType , class ResultType >
   KOKKOS_INLINE_FUNCTION static
-  void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t
+  void apply_team( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t
                  , Kokkos::Impl::QthreadTeamPolicyMember & member )
     {
       typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
@@ -344,7 +344,7 @@ public:
 
   template< class FunctorType , class ResultType >
   KOKKOS_INLINE_FUNCTION static
-  void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t
+  void apply_team( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t
                  , Kokkos::Impl::QthreadTeamPolicyMember & member )
     {
       typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
@@ -575,10 +575,10 @@ public:
   template< class A1 , class A2 , class A3 , class A4 >
   void add_dependence( const Future<A1,A2> & after
                      , const Future<A3,A4> & before
-                     , typename Kokkos::Impl::enable_if
-                        < Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
+                     , typename std::enable_if
+                        < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
                           &&
-                          Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
+                          std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
                         >::type * = 0
                       )
     {
@@ -621,8 +621,8 @@ public:
   template< class FunctorType , class A3 , class A4 >
   void add_dependence( FunctorType * task_functor
                      , const Future<A3,A4> & before
-                     , typename Kokkos::Impl::enable_if
-                        < Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
+                     , typename std::enable_if
+                        < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
                         >::type * = 0
                       )
     {
@@ -659,6 +659,6 @@ public:
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 #endif /* #define KOKKOS_QTHREAD_TASK_HPP */
 
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
index 5f0b8f70cd8ef36dd153b7bcbb84c42300f4fa6e..9f6e3d37b1b37298655652707980a035ba4eaa95 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
@@ -714,12 +714,12 @@ void ThreadsExec::initialize( unsigned thread_count ,
   }
 
   // Check for over-subscription
-  if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) {
-    std::cout << "Kokkos::Threads::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl;
-    std::cout << "                                    Detected: " << Impl::processors_per_node() << " cores per node." << std::endl;
-    std::cout << "                                    Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl;
-    std::cout << "                                    Requested: " << thread_count << " threads per process." << std::endl;
-  }
+  //if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) {
+  //  std::cout << "Kokkos::Threads::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl;
+  //  std::cout << "                                    Detected: " << Impl::processors_per_node() << " cores per node." << std::endl;
+  //  std::cout << "                                    Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl;
+  //  std::cout << "                                    Requested: " << thread_count << " threads per process." << std::endl;
+  //}
 
   // Init the array for used for arbitrarily sized atomics
   Impl::init_lock_array_host_space();
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
index 3407ffaa54149499d5046ae887a3b415627287b6..4256b0aa67f75125d7f4d6ef6d652f206e9fd847 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
@@ -406,6 +406,8 @@ public:
           m_exec->barrier();
         }
       }
+      else
+      { m_invalid_thread = 1; }
     }
 
   ThreadsExecTeamMember()
@@ -460,7 +462,7 @@ public:
 
     if(m_league_chunk_end > m_league_size) m_league_chunk_end = m_league_size;
 
-    if(m_league_rank>=0)
+    if((m_league_rank>=0) && (m_league_rank < m_league_chunk_end))
       return true;
     return false;
   }
@@ -704,23 +706,22 @@ public:
 
 namespace Kokkos {
 
-template<typename iType>
+template< typename iType >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>
-TeamThreadRange(const Impl::ThreadsExecTeamMember& thread, const iType& count)
+Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >
+TeamThreadRange( const Impl::ThreadsExecTeamMember& thread, const iType& count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,count);
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, count );
 }
 
-template<typename iType>
+template< typename iType1, typename iType2 >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>
-TeamThreadRange( const Impl::ThreadsExecTeamMember& thread
-               , const iType & begin
-               , const iType & end
-               )
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::ThreadsExecTeamMember>
+TeamThreadRange( const Impl::ThreadsExecTeamMember& thread, const iType1 & begin, const iType2 & end )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,begin,end);
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, iType(begin), iType(end) );
 }
 
 
diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
deleted file mode 100644
index e1599284b297bee7a770d2a6ce87a429a9e5d08a..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-// Experimental unified task-data parallel manycore LDRD
-
-#include <stdio.h>
-#include <iostream>
-#include <sstream>
-#include <Kokkos_Core.hpp>
-#include <Threads/Kokkos_Threads_TaskPolicy.hpp>
-
-#if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY )
-
-#define QLOCK   (reinterpret_cast<void*>( ~((uintptr_t)0) ))
-#define QDENIED (reinterpret_cast<void*>( ~((uintptr_t)0) - 1 ))
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-void ThreadsTaskPolicyQueue::Destroy::destroy_shared_allocation()
-{
-  // Verify the queue is empty
-
-  if ( m_policy->m_count_ready ||
-       m_policy->m_team[0] ||
-       m_policy->m_team[1] ||
-       m_policy->m_team[2] ||
-       m_policy->m_serial[0] ||
-       m_policy->m_serial[1] ||
-       m_policy->m_serial[2] ) {
-    Kokkos::abort("ThreadsTaskPolicyQueue ERROR : Attempt to destroy non-empty queue" );
-  }
-
-  m_policy->~ThreadsTaskPolicyQueue();
-}
-
-//----------------------------------------------------------------------------
-
-ThreadsTaskPolicyQueue::~ThreadsTaskPolicyQueue()
-{
-}
-
-ThreadsTaskPolicyQueue::ThreadsTaskPolicyQueue
-  ( const unsigned arg_task_max_count
-  , const unsigned arg_task_max_size
-  , const unsigned arg_task_default_dependence_capacity
-  , const unsigned arg_task_team_size
-  )
-  : m_space( Kokkos::Threads::memory_space()
-           , arg_task_max_size * arg_task_max_count * 1.2
-           , 16 /* log2(superblock size) */
-           )
-  , m_team { 0 , 0 , 0 }
-  , m_serial { 0 , 0 , 0 }
-  , m_team_size( arg_task_team_size )
-  , m_default_dependence_capacity( arg_task_default_dependence_capacity )
-  , m_count_ready(0)
-  , m_count_alloc(0)
-{
-  const int threads_total    = Threads::thread_pool_size(0);
-  const int threads_per_numa = Threads::thread_pool_size(1);
-  const int threads_per_core = Threads::thread_pool_size(2);
-
-  if ( 0 == m_team_size ) {
-    // If a team task then claim for execution until count is zero
-    // Issue: team collectives cannot assume which pool members are in the team.
-    // Issue: team must only span a single NUMA region.
-
-    // If more than one thread per core then map cores to work team,
-    // else  map numa to work team.
-
-    if      ( 1 < threads_per_core ) m_team_size = threads_per_core ;
-    else if ( 1 < threads_per_numa ) m_team_size = threads_per_numa ;
-    else                             m_team_size = 1 ;
-  }
-
-  // Verify a valid team size
-  const bool valid_team_size =
-    ( 0 < m_team_size && m_team_size <= threads_total ) &&
-    (
-      ( 1                == m_team_size ) ||
-      ( threads_per_core == m_team_size ) ||
-      ( threads_per_numa == m_team_size )
-    );
-
-  if ( ! valid_team_size ) {
-    std::ostringstream msg ;
-
-    msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Threads > ERROR"
-        << " invalid team_size(" << m_team_size << ")"
-        << " threads_per_core(" << threads_per_core << ")"
-        << " threads_per_numa(" << threads_per_numa << ")"
-        << " threads_total(" << threads_total << ")"
-        ;
-
-    Kokkos::Impl::throw_runtime_exception( msg.str() );
-  }
-
-  Kokkos::memory_fence();
-}
-
-//----------------------------------------------------------------------------
-
-void ThreadsTaskPolicyQueue::driver( Kokkos::Impl::ThreadsExec & exec
-                                   , const void * arg )
-{
-  // Whole thread pool is calling this function
-
-  typedef Kokkos::Impl::ThreadsExecTeamMember member_type ;
-
-  ThreadsTaskPolicyQueue & self =
-    * reinterpret_cast< ThreadsTaskPolicyQueue * >( const_cast<void*>(arg) );
-
-  // Create the thread team member with shared memory for the given task.
-
-  const TeamPolicy< Kokkos::Threads > team_policy( 1 , self.m_team_size );
-
-  member_type team_member( & exec , team_policy , 0 );
-
-  Kokkos::Impl::ThreadsExec & exec_team_base =
-    team_member.threads_exec_team_base();
-
-  task_root_type * volatile * const task_team_ptr =
-    reinterpret_cast<task_root_type**>( exec_team_base.reduce_memory() );
-
-  volatile int * const work_team_ptr =
-    reinterpret_cast<volatile int*>( task_team_ptr + 1 );
-
-  // Each team must iterate this loop synchronously
-  // to insure team-execution of team-task.
-
-  const bool team_lead = team_member.team_fan_in();
-
-  bool work_team = true ;
-
-  while ( work_team ) {
-
-    task_root_type * task = 0 ;
-
-    // Start here with members in a fan_in state
-
-    if ( team_lead ) {
-      // Team lead queries the ready count for a team-consistent view.
-      *work_team_ptr = 0 != self.m_count_ready ;
-
-      // Only the team lead attempts to pop a team task from the queues
-      for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) {
-        if ( ( i < 2 /* regular queue */ )
-             || ( ! self.m_space.is_empty() /* waiting for memory queue */ ) ) {
-          task = pop_ready_task( & self.m_team[i] );
-        }
-      }
-
-      *task_team_ptr = task ;
-    }
-
-    Kokkos::memory_fence();
-
-    team_member.team_fan_out();
-
-    work_team = *work_team_ptr ;
-
-    // Query if team acquired a team task
-
-    if ( 0 != ( task = *task_team_ptr ) ) {
-      // Set shared memory
-      team_member.set_league_shmem( 0 , 1 , task->m_shmem_size );
-
-      (*task->m_team)( task , team_member );
-
-      // The team task called the functor,
-      // called the team_fan_in(), and
-      // if completed the team lead destroyed the task functor.
-
-      if ( team_lead ) {
-        self.complete_executed_task( task );
-      }
-    }
-    else {
-      // No team task acquired, each thread try a serial task
-      // Try the priority queue, then the regular queue.
-      for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) {
-        if ( ( i < 2 /* regular queue */ )
-             || ( ! self.m_space.is_empty() /* waiting for memory queue */ ) ) {
-          task = pop_ready_task( & self.m_serial[i] );
-        }
-      }
-
-      if ( 0 != task ) {
-
-        (*task->m_serial)( task );
-
-        self.complete_executed_task( task );
-      }
-
-      team_member.team_fan_in();
-    }
-  }
-
-  team_member.team_fan_out();
-
-  exec.fan_in();
-}
-
-//----------------------------------------------------------------------------
-
-ThreadsTaskPolicyQueue::task_root_type *
-ThreadsTaskPolicyQueue::pop_ready_task(
-  ThreadsTaskPolicyQueue::task_root_type * volatile * const queue )
-{
-  task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK);
-  task_root_type * task = 0 ;
-  task_root_type * const task_claim = *queue ;
-
-  if ( ( q_lock != task_claim ) && ( 0 != task_claim ) ) {
-
-    // Queue is not locked and not null, try to claim head of queue.
-    // Is a race among threads to claim the queue.
-
-    if ( task_claim == atomic_compare_exchange(queue,task_claim,q_lock) ) {
-
-      // Aquired the task which must be in the waiting state.
-
-      const int claim_state =
-        atomic_compare_exchange( & task_claim->m_state
-                               , int(TASK_STATE_WAITING)
-                               , int(TASK_STATE_EXECUTING) );
-
-      task_root_type * lock_verify = 0 ;
-
-      if ( claim_state == int(TASK_STATE_WAITING) ) {
-
-        // Transitioned this task from waiting to executing
-        // Update the queue to the next entry and release the lock
-
-        task_root_type * const next =
-          *((task_root_type * volatile *) & task_claim->m_next );
-
-        *((task_root_type * volatile *) & task_claim->m_next ) = 0 ;
-
-        lock_verify = atomic_compare_exchange( queue , q_lock , next );
-      }
-
-      if ( ( claim_state != int(TASK_STATE_WAITING) ) |
-           ( q_lock != lock_verify ) ) {
-
-        fprintf(stderr,"ThreadsTaskPolicyQueue::pop_ready_task(0x%lx) task(0x%lx) state(%d) ERROR %s\n"
-                      , (unsigned long) queue
-                      , (unsigned long) task
-                      , claim_state
-                      , ( claim_state != int(TASK_STATE_WAITING)
-                        ? "NOT WAITING"
-                        : "UNLOCK" ) );
-        fflush(stderr);
-        Kokkos::abort("ThreadsTaskPolicyQueue::pop_ready_task");
-      }
-
-      task = task_claim ;
-    }
-  }
-
-  return task ;
-}
-
-//----------------------------------------------------------------------------
-
-void ThreadsTaskPolicyQueue::complete_executed_task(
-  ThreadsTaskPolicyQueue::task_root_type * task )
-{
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-
-  // State is either executing or if respawned then waiting,
-  // try to transition from executing to complete.
-  // Reads the current value.
-
-  const int state_old =
-    atomic_compare_exchange( & task->m_state
-                           , int(Kokkos::Experimental::TASK_STATE_EXECUTING)
-                           , int(Kokkos::Experimental::TASK_STATE_COMPLETE) );
-
-  if ( int(Kokkos::Experimental::TASK_STATE_WAITING) == state_old ) {
-    // Task requested a respawn so reschedule it.
-    // The reference count will be incremented if placed in a queue.
-    schedule_task( task , false /* not the initial spawn */ );
-  }
-  else if ( int(Kokkos::Experimental::TASK_STATE_EXECUTING) == state_old ) {
-    /* Task is complete */
-
-    // Clear dependences of this task before locking wait queue
-
-    task->clear_dependence();
-
-    // Stop other tasks from adding themselves to this task's wait queue.
-    // The wait queue is updated concurrently so guard with an atomic.
-
-    task_root_type * wait_queue     = *((task_root_type * volatile *) & task->m_wait );
-    task_root_type * wait_queue_old = 0 ;
-
-    do {
-      wait_queue_old = wait_queue ;
-      wait_queue     = atomic_compare_exchange( & task->m_wait , wait_queue_old , q_denied );
-    } while ( wait_queue_old != wait_queue );
-
-    // The task has been removed from ready queue and
-    // execution is complete so decrement the reference count.
-    // The reference count was incremented by the initial spawning.
-    // The task may be deleted if this was the last reference.
-    task_root_type::assign( & task , 0 );
-
-    // Pop waiting tasks and schedule them
-    while ( wait_queue ) {
-      task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ;
-      schedule_task( x , false /* not the initial spawn */ );
-    }
-  }
-  else {
-    fprintf( stderr
-           , "ThreadsTaskPolicyQueue::complete_executed_task(0x%lx) ERROR state_old(%d) dep_size(%d)\n"
-           , (unsigned long)( task )
-           , int(state_old)
-           , task->m_dep_size
-           );
-    fflush( stderr );
-    Kokkos::abort("ThreadsTaskPolicyQueue::complete_executed_task" );
-  }
-
-  // If the task was respawned it may have already been
-  // put in a ready queue and the count incremented.
-  // By decrementing the count last it will never go to zero
-  // with a ready or executing task.
-
-  atomic_fetch_add( & m_count_ready , -1 );
-}
-
-//----------------------------------------------------------------------------
-
-void ThreadsTaskPolicyQueue::reschedule_task(
-  ThreadsTaskPolicyQueue::task_root_type * const task )
-{
-  // Reschedule transitions from executing back to waiting.
-  const int old_state =
-    atomic_compare_exchange( & task->m_state
-                           , int(TASK_STATE_EXECUTING)
-                           , int(TASK_STATE_WAITING) );
-
-  if ( old_state != int(TASK_STATE_EXECUTING) ) {
-
-    fprintf( stderr
-           , "ThreadsTaskPolicyQueue::reschedule_task(0x%lx) ERROR state(%d)\n"
-           , (unsigned long) task
-           , old_state
-           );
-    fflush(stderr);
-    Kokkos::abort("ThreadsTaskPolicyQueue::reschedule" );
-  }
-}
-
-void ThreadsTaskPolicyQueue::schedule_task
-  ( ThreadsTaskPolicyQueue::task_root_type * const task 
-  , const bool initial_spawn )
-{
-  task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK);
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-
-  //----------------------------------------
-  // State is either constructing or already waiting.
-  // If constructing then transition to waiting.
-
-  {
-    const int old_state = atomic_compare_exchange( & task->m_state
-                                                 , int(TASK_STATE_CONSTRUCTING)
-                                                 , int(TASK_STATE_WAITING) );
-
-    // Head of linked list of tasks waiting on this task
-    task_root_type * const waitTask =
-      *((task_root_type * volatile const *) & task->m_wait );
-
-    // Member of linked list of tasks waiting on some other task
-    task_root_type * const next =
-      *((task_root_type * volatile const *) & task->m_next );
-
-    // An incomplete and non-executing task has:
-    //   task->m_state == TASK_STATE_CONSTRUCTING or TASK_STATE_WAITING
-    //   task->m_wait  != q_denied
-    //   task->m_next  == 0
-    //
-    if ( ( q_denied == waitTask ) ||
-         ( 0 != next ) ||
-         ( old_state != int(TASK_STATE_CONSTRUCTING) &&
-           old_state != int(TASK_STATE_WAITING) ) ) {
-      fprintf(stderr,"ThreadsTaskPolicyQueue::schedule_task(0x%lx) STATE ERROR: state(%d) wait(0x%lx) next(0x%lx)\n"
-                    , (unsigned long) task
-                    , old_state
-                    , (unsigned long) waitTask
-                    , (unsigned long) next );
-      fflush(stderr);
-      Kokkos::abort("ThreadsTaskPolicyQueue::schedule" );
-    }
-  }
-
-  //----------------------------------------
-
-  if ( initial_spawn ) {
-    // The initial spawn of a task increments the reference count
-    // for the task's existence in either a waiting or ready queue
-    // until the task has completed.
-    // Completing the task's execution is the matching
-    // decrement of the reference count.
-
-    task_root_type::assign( 0 , task );
-  }
-
-  //----------------------------------------
-  // Insert this task into a dependence task that is not complete.
-  // Push on to that task's wait queue.
-
-  bool attempt_insert_in_queue = true ;
-
-  task_root_type * volatile * queue =
-    task->m_dep_size ? & task->m_dep[0]->m_wait : (task_root_type **) 0 ;
-
-  for ( int i = 0 ; attempt_insert_in_queue && ( 0 != queue ) ; ) {
-
-    task_root_type * const head_value_old = *queue ;
-
-    if ( q_denied == head_value_old ) {
-      // Wait queue is closed because task is complete,
-      // try again with the next dependence wait queue.
-      ++i ;
-      queue = i < task->m_dep_size ? & task->m_dep[i]->m_wait
-                                   : (task_root_type **) 0 ;
-    }
-    else {
-
-      // Wait queue is open and not denied.
-      // Have exclusive access to this task.
-      // Assign m_next assuming a successfull insertion into the queue.
-      // Fence the memory assignment before attempting the CAS.
-
-      *((task_root_type * volatile *) & task->m_next ) = head_value_old ;
-
-      memory_fence();
-
-      // Attempt to insert this task into the queue.
-      // If fails then continue the attempt.
-
-      attempt_insert_in_queue =
-        head_value_old != atomic_compare_exchange(queue,head_value_old,task);
-    }
-  }
-
-  //----------------------------------------
-  // All dependences are complete, insert into the ready list
-
-  if ( attempt_insert_in_queue ) {
-
-    // Increment the count of ready tasks.
-    // Count will be decremented when task is complete.
-
-    atomic_fetch_add( & m_count_ready , 1 );
-
-    queue = task->m_queue ;
-
-    while ( attempt_insert_in_queue ) {
-
-      // A locked queue is being popped.
-
-      task_root_type * const head_value_old = *queue ;
-
-      if ( q_lock != head_value_old ) {
-        // Read the head of ready queue,
-        // if same as previous value then CAS locks the ready queue
-
-        // Have exclusive access to this task,
-        // assign to head of queue, assuming successful insert
-        // Fence assignment before attempting insert.
-        *((task_root_type * volatile *) & task->m_next ) = head_value_old ;
-
-        memory_fence();
-
-        attempt_insert_in_queue =
-          head_value_old != atomic_compare_exchange(queue,head_value_old,task);
-      }
-    }
-  }
-}
-
-
-void TaskMember< Kokkos::Threads , void , void >::latch_add( const int k )
-{
-  typedef TaskMember< Kokkos::Threads , void , void > task_root_type ;
-
-  task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED);
-  
-  const bool ok_input = 0 < k ;
-  
-  const int count = ok_input ? atomic_fetch_add( & m_dep_size , -k ) - k
-                             : k ;
-                           
-  const bool ok_count = 0 <= count ;
-  
-  const int state = 0 != count ? TASK_STATE_WAITING :
-    atomic_compare_exchange( & m_state
-                           , TASK_STATE_WAITING
-                           , TASK_STATE_COMPLETE );
-          
-  const bool ok_state = state == TASK_STATE_WAITING ;
-            
-  if ( ! ok_count || ! ok_state ) {
-    printf( "ThreadsTaskPolicyQueue::latch_add[0x%lx](%d) ERROR %s %d\n"
-          , (unsigned long) this
-          , k
-          , ( ! ok_input ? "Non-positive input" :
-            ( ! ok_count ? "Negative count" : "Bad State" ) )
-          , ( ! ok_input ? k :
-            ( ! ok_count ? count : state ) )
-          );
-    Kokkos::abort( "ThreadsTaskPolicyQueue::latch_add ERROR" );
-  } 
-  else if ( 0 == count ) {
-    // Stop other tasks from adding themselves to this latch's wait queue.
-    // The wait queue is updated concurrently so guard with an atomic.
-      
-    ThreadsTaskPolicyQueue & policy = *m_policy ; 
-    task_root_type * wait_queue     = *((task_root_type * volatile *) &m_wait);
-    task_root_type * wait_queue_old = 0 ;
-
-    do {
-      wait_queue_old = wait_queue ;
-      wait_queue     = atomic_compare_exchange( & m_wait , wait_queue_old , q_denied );
-    } while ( wait_queue_old != wait_queue );
-    
-    // Pop waiting tasks and schedule them
-    while ( wait_queue ) {
-      task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ;
-      policy.schedule_task( x , false /* not initial spawn */ );
-    }
-  }
-}
-
-//----------------------------------------------------------------------------
-
-void ThreadsTaskPolicyQueue::deallocate_task( void * ptr , unsigned size_alloc )
-{
-/*
-  const int n = atomic_fetch_add( & alloc_count , -1 ) - 1 ;
-
-  fprintf( stderr
-         , "ThreadsTaskPolicyQueue::deallocate_task(0x%lx,%d) count(%d)\n"
-         , (unsigned long) ptr
-         , size_alloc
-         , n
-         );
-  fflush( stderr );
-*/
-
-  m_space.deallocate( ptr , size_alloc );
-
-  Kokkos::atomic_decrement( & m_count_alloc );
-}
-
-ThreadsTaskPolicyQueue::task_root_type *
-ThreadsTaskPolicyQueue::allocate_task
-  ( const unsigned arg_sizeof_task
-  , const unsigned arg_dep_capacity
-  , const unsigned arg_team_shmem
-  )
-{ 
-  const unsigned base_size = arg_sizeof_task +
-    ( arg_sizeof_task % sizeof(task_root_type*)
-    ? sizeof(task_root_type*) - arg_sizeof_task % sizeof(task_root_type*)
-    : 0 );
-    
-  const unsigned dep_capacity
-    = ~0u == arg_dep_capacity
-    ? m_default_dependence_capacity
-    : arg_dep_capacity ;
-
-  const unsigned size_alloc =
-     base_size + sizeof(task_root_type*) * dep_capacity ;
-
-#if 0
-  // User created task memory pool with an estimate,
-  // if estimate is to low then report and throw exception.
-
-  if ( m_space.get_min_block_size() < size_alloc ) {
-    fprintf(stderr,"TaskPolicy<Threads> task allocation requires %d bytes on memory pool with %d byte chunk size\n"
-           , int(size_alloc)
-           , int(m_space.get_min_block_size())
-           );
-    fflush(stderr);
-    Kokkos::Impl::throw_runtime_exception("TaskMember< Threads >::task_allocate");
-  }
-#endif
-
-  task_root_type * const task =
-    reinterpret_cast<task_root_type*>( m_space.allocate( size_alloc ) );
-      
-  if ( task != 0 ) {
-        
-    // Initialize task's root and value data structure
-    // Calling function must copy construct the functor.
-        
-    new( (void*) task ) task_root_type();
-  
-    task->m_policy       = this ;
-    task->m_size_alloc   = size_alloc ;
-    task->m_dep_capacity = dep_capacity ;
-    task->m_shmem_size   = arg_team_shmem ;
-
-    if ( dep_capacity ) {
-      task->m_dep =
-        reinterpret_cast<task_root_type**>(
-        reinterpret_cast<unsigned char*>(task) + base_size );
-
-      for ( unsigned i = 0 ; i < dep_capacity ; ++i )
-        task->task_root_type::m_dep[i] = 0 ;
-    }
-
-    Kokkos::atomic_increment( & m_count_alloc );
-  }
-  return  task ;
-}
-
-
-//----------------------------------------------------------------------------
-
-void ThreadsTaskPolicyQueue::add_dependence
-  ( ThreadsTaskPolicyQueue::task_root_type * const after
-  , ThreadsTaskPolicyQueue::task_root_type * const before
-  )
-{
-  if ( ( after != 0 ) && ( before != 0 ) ) {
-
-    int const state = *((volatile const int *) & after->m_state );
-
-    // Only add dependence during construction or during execution.
-    // Both tasks must have the same policy.
-    // Dependence on non-full memory cannot be mixed with any other dependence.
-
-    const bool ok_state =
-      Kokkos::Experimental::TASK_STATE_CONSTRUCTING == state ||
-      Kokkos::Experimental::TASK_STATE_EXECUTING    == state ;
-
-    const bool ok_capacity =
-      after->m_dep_size < after->m_dep_capacity ;
-
-    const bool ok_policy =
-      after->m_policy == this && before->m_policy == this ;
-
-    if ( ok_state && ok_capacity && ok_policy ) {
-
-      ++after->m_dep_size ;
-
-      task_root_type::assign( after->m_dep + (after->m_dep_size-1) , before );
-
-      memory_fence();
-    }
-    else {
-
-fprintf( stderr
-       , "ThreadsTaskPolicyQueue::add_dependence( 0x%lx , 0x%lx ) ERROR %s\n"
-       , (unsigned long) after
-       , (unsigned long) before
-       , ( ! ok_state    ? "Task not constructing or executing" :
-         ( ! ok_capacity ? "Task Exceeded dependence capacity" 
-                         : "Tasks from different policies" 
-         )) );
-
-fflush( stderr );
-
-      Kokkos::abort("ThreadsTaskPolicyQueue::add_dependence ERROR");
-    }
-  }
-}
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-TaskPolicy< Kokkos::Threads >::TaskPolicy
-  ( const unsigned arg_task_max_count
-  , const unsigned arg_task_max_size // Application's task size
-  , const unsigned arg_task_default_dependence_capacity
-  , const unsigned arg_task_team_size
-  )
-  : m_track()
-  , m_policy(0)
-{
-  typedef Kokkos::Experimental::Impl::SharedAllocationRecord
-    < Kokkos::HostSpace , Impl::ThreadsTaskPolicyQueue::Destroy > record_type ;
-
-  record_type * record =
-    record_type::allocate( Kokkos::HostSpace()
-                         , "Threads task queue"
-                         , sizeof(Impl::ThreadsTaskPolicyQueue)
-                         );
-
-  m_policy =
-    reinterpret_cast< Impl::ThreadsTaskPolicyQueue * >( record->data() );
-
-  // Tasks are allocated with application's task size + sizeof(task_root_type)
-
-  const size_t full_task_size_estimate =
-    arg_task_max_size +
-    sizeof(task_root_type) +
-    sizeof(task_root_type*) * arg_task_default_dependence_capacity ;
-
-  new( m_policy )
-    Impl::ThreadsTaskPolicyQueue( arg_task_max_count
-                                , full_task_size_estimate
-                                , arg_task_default_dependence_capacity
-                                , arg_task_team_size );
-
-  record->m_destroy.m_policy = m_policy ;
-
-  m_track.assign_allocated_record_to_uninitialized( record );
-}
-
-
-TaskPolicy< Kokkos::Threads >::member_type &
-TaskPolicy< Kokkos::Threads >::member_single()
-{
-  static member_type s ;
-  return s ;
-}
-
-void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Threads > & policy )
-{
-  typedef Kokkos::Impl::ThreadsExecTeamMember member_type ;
-
-  enum { BASE_SHMEM = 1024 };
-
-  Kokkos::Impl::ThreadsExec::resize_scratch( 0 , member_type::team_reduce_size() + BASE_SHMEM );
-
-  Kokkos::Impl::ThreadsExec::start( & Impl::ThreadsTaskPolicyQueue::driver
-                                  , policy.m_policy );
-
-  Kokkos::Impl::ThreadsExec::fence();
-}
-
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-typedef TaskMember< Kokkos::Threads , void , void > Task ;
-
-//----------------------------------------------------------------------------
-
-Task::~TaskMember()
-{
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-
-void Task::assign( Task ** const lhs_ptr , Task * rhs )
-{
-  Task * const q_denied = reinterpret_cast<Task*>(QDENIED);
-
-  // Increment rhs reference count.
-  if ( rhs ) { atomic_fetch_add( & rhs->m_ref_count , 1 ); }
-
-  if ( 0 == lhs_ptr ) return ;
-
-  // Must have exclusive access to *lhs_ptr.
-  // Assign the pointer and retrieve the previous value.
-
-#if 1
-
-  Task * const old_lhs = *lhs_ptr ;
-
-  *lhs_ptr = rhs ;
-
-#elif 0
-
-  Task * const old_lhs = *((Task*volatile*)lhs_ptr);
-
-  *((Task*volatile*)lhs_ptr) = rhs ;
-
-  Kokkos::memory_fence();
-
-#else
-
-  Task * const old_lhs = atomic_exchange( lhs_ptr , rhs );
-
-#endif
-
-  if ( old_lhs && rhs && old_lhs->m_policy != rhs->m_policy ) {
-    Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Threads>::assign ERROR different queues");
-  }
-
-  if ( old_lhs ) {
-
-    // Decrement former lhs reference count.
-    // If reference count is zero task must be complete, then delete task.
-    // Task is ready for deletion when  wait == q_denied
-    int const count = atomic_fetch_add( & (old_lhs->m_ref_count) , -1 ) - 1 ;
-    int const state = old_lhs->m_state ;
-    Task * const wait = *((Task * const volatile *) & old_lhs->m_wait );
-
-    const bool ok_count = 0 <= count ;
-
-    // If count == 0 then will be deleting
-    // and must either be constructing or complete.
-    const bool ok_state = 0 < count ? true :
-      ( ( state == int(TASK_STATE_CONSTRUCTING) && wait == 0 ) ||
-        ( state == int(TASK_STATE_COMPLETE)     && wait == q_denied ) )
-      &&
-     old_lhs->m_next == 0 &&
-     old_lhs->m_dep_size == 0 ;
-
-    if ( ! ok_count || ! ok_state ) {
-
-      fprintf( stderr , "Kokkos::Impl::TaskManager<Kokkos::Threads>::assign ERROR deleting task(0x%lx) m_ref_count(%d) , m_wait(0x%ld)\n"
-                      , (unsigned long) old_lhs
-                      , count
-                      , (unsigned long) wait );
-      fflush(stderr);
-      Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Threads>::assign ERROR deleting");
-    }
-
-    if ( count == 0 ) {
-      // When 'count == 0' this thread has exclusive access to 'old_lhs'
-
-      ThreadsTaskPolicyQueue & queue = *( old_lhs->m_policy );
-
-      queue.deallocate_task( old_lhs , old_lhs->m_size_alloc );
-    }
-  }
-}
-
-#endif
-
-//----------------------------------------------------------------------------
-
-Task * Task::get_dependence( int i ) const
-{
-  Task * const t = m_dep[i] ;
-
-  if ( Kokkos::Experimental::TASK_STATE_EXECUTING != m_state || i < 0 || m_dep_size <= i || 0 == t ) {
-
-fprintf( stderr
-       , "TaskMember< Threads >::get_dependence ERROR : task[%lx]{ state(%d) dep_size(%d) dep[%d] = %lx }\n"
-       , (unsigned long) this
-       , m_state
-       , m_dep_size
-       , i
-       , (unsigned long) t
-       );
-fflush( stderr );
-
-    Kokkos::Impl::throw_runtime_exception("TaskMember< Threads >::get_dependence ERROR");
-  }
-
-  return t ;
-}
-
-//----------------------------------------------------------------------------
-
-void Task::clear_dependence()
-{
-  for ( int i = m_dep_size - 1 ; 0 <= i ; --i ) {
-    assign( m_dep + i , 0 );
-  }
-
-  *((volatile int *) & m_dep_size ) = 0 ;
-
-  memory_fence();
-}
-
-//----------------------------------------------------------------------------
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
-
diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp
deleted file mode 100644
index 116d32e4fc4d6c6da2968518caacc133e7488ab4..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp
+++ /dev/null
@@ -1,745 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-// Experimental unified task-data parallel manycore LDRD
-
-#ifndef KOKKOS_THREADS_TASKPOLICY_HPP
-#define KOKKOS_THREADS_TASKPOLICY_HPP
-
-
-#include <Kokkos_Threads.hpp>
-#include <Kokkos_TaskPolicy.hpp>
-
-#if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY )
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-struct ThreadsTaskPolicyQueue ;
-
-/** \brief  Base class for all Kokkos::Threads tasks */
-template<>
-class TaskMember< Kokkos::Threads , void , void > {
-public:
-
-  template < class > friend class Kokkos::Experimental::TaskPolicy ;
-  friend struct ThreadsTaskPolicyQueue ;
-
-  typedef TaskMember * (* function_verify_type) ( TaskMember * );
-  typedef void         (* function_single_type) ( TaskMember * );
-  typedef void         (* function_team_type)   ( TaskMember * , Kokkos::Impl::ThreadsExecTeamMember & );
-
-private:
-
-
-  ThreadsTaskPolicyQueue * m_policy ;
-  TaskMember * volatile  * m_queue ;
-  function_verify_type     m_verify ;
-  function_team_type       m_team ;         ///< Apply function
-  function_single_type     m_serial ;       ///< Apply function
-  TaskMember **            m_dep ;          ///< Dependences
-  TaskMember *             m_wait ;         ///< Head of linked list of tasks waiting on this task
-  TaskMember *             m_next ;         ///< Member of linked list of tasks
-  int                      m_dep_capacity ; ///< Capacity of dependences
-  int                      m_dep_size ;     ///< Actual count of dependences
-  int                      m_size_alloc ;
-  int                      m_shmem_size ;
-  int                      m_ref_count ;    ///< Reference count
-  int                      m_state ;        ///< State of the task
-
-
-  TaskMember( TaskMember && ) = delete ;
-  TaskMember( const TaskMember & ) = delete ;
-  TaskMember & operator = ( TaskMember && ) = delete ;
-  TaskMember & operator = ( const TaskMember & ) = delete ;
-
-protected:
-
-  TaskMember()
-    : m_policy(0)
-    , m_verify(0)
-    , m_team(0)
-    , m_serial(0)
-    , m_dep(0)
-    , m_wait(0)
-    , m_next(0)
-    , m_dep_capacity(0)
-    , m_dep_size(0)
-    , m_size_alloc(0)
-    , m_shmem_size(0)
-    , m_ref_count(0)
-    , m_state( TASK_STATE_CONSTRUCTING )
-    {}
-
-public:
-
-  ~TaskMember();
-
-  KOKKOS_INLINE_FUNCTION
-  int reference_count() const
-    { return *((volatile int *) & m_ref_count ); }
-
-  template< typename ResultType >
-  KOKKOS_FUNCTION static
-  TaskMember * verify_type( TaskMember * t )
-    {
-      enum { check_type = ! std::is_same< ResultType , void >::value };
-
-      if ( check_type && t != 0 ) {
-
-        // Verify that t->m_verify is this function
-        const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
-
-        if ( t->m_verify != self ) {
-          t = 0 ;
-          Kokkos::abort("TaskPolicy< Threads > verify_result_type" );
-        }
-      }
-      return t ;
-    }
-
-  //----------------------------------------
-  /*  Inheritence Requirements on task types:
-   *
-   *    class TaskMember< Threads , DerivedType::value_type , FunctorType >
-   *      : public TaskMember< Threads , DerivedType::value_type , void >
-   *      , public Functor
-   *      { ... };
-   *
-   *  If value_type != void
-   *    class TaskMember< Threads , value_type , void >
-   *      : public TaskMember< Threads , void , void >
-   *
-   */
-  //----------------------------------------
-
-  template< class DerivedTaskType , class Tag >
-  KOKKOS_FUNCTION static
-  void apply_single(
-    typename std::enable_if
-      <( std::is_same<Tag,void>::value &&
-         std::is_same< typename DerivedTaskType::result_type , void >::value
-       ), TaskMember * >::type t )
-    {
-      {
-        typedef typename DerivedTaskType::functor_type  functor_type ;
-
-        functor_type * const f = 
-          static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) );
-
-        f->apply();
-
-        if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-          f->~functor_type();
-        }
-      }
-    }
-
-  template< class DerivedTaskType , class Tag >
-  KOKKOS_FUNCTION static
-  void apply_single(
-    typename std::enable_if
-      <( std::is_same< Tag , void >::value &&
-         ! std::is_same< typename DerivedTaskType::result_type , void >::value
-       ), TaskMember * >::type t )
-    {
-      {
-        typedef typename DerivedTaskType::functor_type  functor_type ;
-
-        DerivedTaskType * const self = static_cast< DerivedTaskType * >(t);
-        functor_type    * const f    = static_cast< functor_type * >( self );
-
-        f->apply( self->m_result );
-
-        if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-          f->~functor_type();
-        }
-      }
-    }
-
-  //----------------------------------------
-
-  template< class DerivedTaskType , class Tag >
-  KOKKOS_FUNCTION static
-  void apply_team(
-    typename std::enable_if
-      <( std::is_same<Tag,void>::value &&
-         std::is_same<typename DerivedTaskType::result_type,void>::value
-       ), TaskMember * >::type t
-    , Kokkos::Impl::ThreadsExecTeamMember & member
-    )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-
-      functor_type * const f =
-        static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) );
-    
-      f->apply( member );
-
-      // Synchronize for possible functor destruction and
-      // completion of team task.
-      if ( member.team_fan_in() ) {
-        if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-          f->~functor_type();
-        }
-      }
-    }
-
-  template< class DerivedTaskType , class Tag >
-  KOKKOS_FUNCTION static
-  void apply_team(
-    typename std::enable_if
-      <( std::is_same<Tag,void>::value &&
-         ! std::is_same<typename DerivedTaskType::result_type,void>::value
-       ), TaskMember * >::type t
-    , Kokkos::Impl::ThreadsExecTeamMember & member
-    )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-
-      DerivedTaskType * const self = static_cast< DerivedTaskType * >(t);
-      functor_type    * const f    = static_cast< functor_type * >( self );
-    
-      f->apply( member , self->m_result );
-
-      // Synchronize for possible functor destruction and
-      // completion of team task.
-      if ( member.team_fan_in() ) {
-        if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) {
-          f->~functor_type();
-        }
-      }
-    }
-
-  //----------------------------------------
-
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-  static
-  void assign( TaskMember ** const lhs , TaskMember * const rhs );
-#else
-  KOKKOS_INLINE_FUNCTION static
-  void assign( TaskMember ** const lhs , TaskMember * const rhs ) {}
-#endif
-
-  TaskMember * get_dependence( int i ) const ;
-
-  KOKKOS_INLINE_FUNCTION
-  int get_dependence() const { return m_dep_size ; }
-
-  void clear_dependence();
-
-  void latch_add( const int k );
-
-  //----------------------------------------
-
-  typedef FutureValueTypeIsVoidError get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const { return get_result_type() ; }
-
-  inline static
-  void construct_result( TaskMember * const ) {}
-
-  KOKKOS_INLINE_FUNCTION
-  Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
-
-};
-
-/** \brief  A Future< Kokkos::Threads , ResultType > will cast
- *          from  TaskMember< Kokkos::Threads , void , void >
- *          to    TaskMember< Kokkos::Threads , ResultType , void >
- *          to query the result.
- */
-template< class ResultType >
-class TaskMember< Kokkos::Threads , ResultType , void >
-  : public TaskMember< Kokkos::Threads , void , void >
-{
-public:
-
-  typedef ResultType result_type ;
-
-  result_type  m_result ;
-
-  typedef const result_type & get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const { return m_result ; }
-
-  inline static
-  void construct_result( TaskMember * const ptr )
-    { 
-      new((void*)(& ptr->m_result)) result_type();
-    }
-
-  inline
-  TaskMember() : TaskMember< Kokkos::Threads , void , void >(), m_result() {}
-
-  TaskMember( TaskMember && ) = delete ;
-  TaskMember( const TaskMember & ) = delete ;
-  TaskMember & operator = ( TaskMember && ) = delete ;
-  TaskMember & operator = ( const TaskMember & ) = delete ;
-};
-
-/** \brief  Callback functions will cast
- *          from  TaskMember< Kokkos::Threads , void , void >
- *          to    TaskMember< Kokkos::Threads , ResultType , FunctorType >
- *          to execute work functions.
- */
-template< class ResultType , class FunctorType >
-class TaskMember< Kokkos::Threads , ResultType , FunctorType >
-  : public TaskMember< Kokkos::Threads , ResultType , void >
-  , public FunctorType
-{
-public:
-  typedef ResultType   result_type ;
-  typedef FunctorType  functor_type ;
-
-  inline
-  TaskMember( const functor_type & arg_functor )
-    : TaskMember< Kokkos::Threads , ResultType , void >()
-    , functor_type( arg_functor )
-    {}
-
-  inline static
-  void copy_construct( TaskMember * const ptr
-                     , const functor_type & arg_functor )
-    {
-      typedef TaskMember< Kokkos::Threads , ResultType , void > base_type ;
-
-      new((void*)static_cast<FunctorType*>(ptr)) functor_type( arg_functor );
-
-      base_type::construct_result( static_cast<base_type*>( ptr ) );
-    }
-
-  TaskMember() = delete ;
-  TaskMember( TaskMember && ) = delete ;
-  TaskMember( const TaskMember & ) = delete ;
-  TaskMember & operator = ( TaskMember && ) = delete ;
-  TaskMember & operator = ( const TaskMember & ) = delete ;
-};
-
-//----------------------------------------------------------------------------
-
-struct ThreadsTaskPolicyQueue {
-
-  enum { NPRIORITY = 3 };
-
-  typedef Kokkos::Experimental::MemoryPool< Kokkos::Threads >
-    memory_space ;
-
-  typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Threads, void, void >
-    task_root_type ;
-
-  memory_space     m_space ;
-  task_root_type * m_team[ NPRIORITY ];
-  task_root_type * m_serial[ NPRIORITY ];
-  int              m_team_size ;    ///< Fixed size of a task-team
-  int              m_default_dependence_capacity ;
-  int     volatile m_count_ready ;  ///< Ready plus executing tasks
-  int     volatile m_count_alloc ;  ///< Total allocated tasks
-
-  // Execute tasks until all non-waiting tasks are complete.
-  static void driver( Kokkos::Impl::ThreadsExec & exec
-                    , const void * arg );
-
-  task_root_type * allocate_task
-   ( const unsigned arg_sizeof_task
-   , const unsigned arg_dep_capacity
-   , const unsigned arg_team_shmem
-   );
-
-  void deallocate_task( void * , unsigned );
-  void schedule_task( task_root_type * const
-                    , const bool initial_spawn = true );
-  void reschedule_task( task_root_type * const );
-  void add_dependence( task_root_type * const after
-                     , task_root_type * const before );
-
-  // When a task finishes executing update its dependences
-  // and either deallocate the task if complete
-  // or reschedule the task if respawned.
-  void complete_executed_task( task_root_type * );
-
-  // Pop a task from a ready queue
-  static task_root_type *
-    pop_ready_task( task_root_type * volatile * const queue );
-
-  ThreadsTaskPolicyQueue() = delete ;
-  ThreadsTaskPolicyQueue( ThreadsTaskPolicyQueue && ) = delete ;
-  ThreadsTaskPolicyQueue( const ThreadsTaskPolicyQueue & ) = delete ;
-  ThreadsTaskPolicyQueue & operator = ( ThreadsTaskPolicyQueue && ) = delete ;
-  ThreadsTaskPolicyQueue & operator = ( const ThreadsTaskPolicyQueue & ) = delete ;
-
-  ~ThreadsTaskPolicyQueue();
-
-  ThreadsTaskPolicyQueue
-    ( const unsigned arg_task_max_count
-    , const unsigned arg_task_max_size
-    , const unsigned arg_task_default_dependence_capacity
-    , const unsigned arg_task_team_size
-    );
-
-  // Callback to destroy the shared memory tracked queue.
-  struct Destroy {
-    ThreadsTaskPolicyQueue * m_policy ;
-    void destroy_shared_allocation();
-  };
-};
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-void wait( TaskPolicy< Kokkos::Threads > & );
-
-template<>
-class TaskPolicy< Kokkos::Threads >
-{
-public:
-
-  typedef Kokkos::Threads                      execution_space ;
-  typedef TaskPolicy                           execution_policy ;
-  typedef Kokkos::Impl::ThreadsExecTeamMember  member_type ;
-
-private:
-
-  typedef Impl::TaskMember< Kokkos::Threads , void , void >  task_root_type ;
-  typedef Kokkos::Experimental::MemoryPool< Kokkos::Threads > memory_space ;
-
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
-
-  track_type                      m_track ;
-  Impl::ThreadsTaskPolicyQueue  * m_policy ;
-
-  template< class FunctorType >
-  static inline
-  const task_root_type * get_task_root( const FunctorType * f )
-    {
-      typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
-      return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
-    }
-
-  template< class FunctorType >
-  static inline
-  task_root_type * get_task_root( FunctorType * f )
-    {
-      typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
-      return static_cast< task_root_type * >( static_cast< task_type * >(f) );
-    }
-
-  /** \brief  Allocate and construct a task.
-   *
-   *  Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
-   */
-  template< class DerivedTaskType , class Tag >
-  task_root_type *
-  create( const typename DerivedTaskType::functor_type &  arg_functor
-        , const task_root_type::function_single_type      arg_apply_single
-        , const task_root_type::function_team_type        arg_apply_team
-        , const unsigned                                  arg_team_shmem
-        , const unsigned                                  arg_dependence_capacity
-        )
-    {
-      task_root_type * const t =
-        m_policy->allocate_task( sizeof(DerivedTaskType)
-                               , arg_dependence_capacity
-                               , arg_team_shmem
-                               );
-      if ( t != 0 ) {
-
-        DerivedTaskType * const task = static_cast<DerivedTaskType*>(t);
-
-        DerivedTaskType::copy_construct( task , arg_functor );
-
-        task->task_root_type::m_verify  = & task_root_type::template verify_type< typename DerivedTaskType::value_type > ;
-        task->task_root_type::m_team    = arg_apply_team ;
-        task->task_root_type::m_serial  = arg_apply_single ;
-
-        // Do not proceed until initialization is written to memory
-        Kokkos::memory_fence();
-      }
-      return t ;
-    }
-
-public:
-
-  // Valid team sizes are 1,
-  // Threads::pool_size(1) == threads per numa, or
-  // Threads::pool_size(2) == threads per core
-
-  TaskPolicy
-    ( const unsigned arg_task_max_count
-    , const unsigned arg_task_max_size
-    , const unsigned arg_task_default_dependence_capacity = 4
-    , const unsigned arg_task_team_size = 0 /* choose default */
-    );
-
-  KOKKOS_FUNCTION TaskPolicy() = default ;
-  KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  int allocated_task_count() const { return m_policy->m_count_alloc ; }
-
-  //----------------------------------------
-  // Create serial-thread task
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  task_create( const FunctorType & functor
-             , const unsigned dependence_capacity = ~0u )
-    {
-      typedef typename FunctorType::value_type  value_type ;
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >  task_type ;
-
-      return Future< value_type , execution_space >(
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        TaskPolicy::create< task_type , void >
-          ( functor
-          , & task_root_type::template apply_single< task_type , void >
-          , task_root_type::function_team_type(0)
-          , 0
-          , dependence_capacity
-          )
-#endif
-        );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  proc_create( const FunctorType & functor
-             , const unsigned dependence_capacity = ~0u )
-    { return task_create( functor , dependence_capacity ); }
-
-  // Create thread-team task
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  task_create_team( const FunctorType & functor
-                  , const unsigned dependence_capacity = ~0u )
-    {
-      typedef typename FunctorType::value_type  value_type ;
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >  task_type ;
-
-      return Future< value_type , execution_space >(
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        TaskPolicy::create< task_type , void >
-          ( functor
-          , task_root_type::function_single_type(0)
-          , & task_root_type::template apply_team< task_type , void >
-          , Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::
-              value( functor , m_policy->m_team_size )
-          , dependence_capacity
-          )
-#endif
-        );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  proc_create_team( const FunctorType & functor
-                  , const unsigned dependence_capacity = ~0u )
-    { return task_create_team( functor , dependence_capacity ); }
-
-  template< class A1 , class A2 , class A3 , class A4 >
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( const Future<A1,A2> & after
-                     , const Future<A3,A4> & before
-                     , typename std::enable_if
-                        < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
-                          &&
-                          std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
-                        >::type * = 0
-                      ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      m_policy->add_dependence( after.m_task , before.m_task );
-#endif
-    }
-
-  //----------------------------------------
-
-  Future< Latch , execution_space >
-  KOKKOS_INLINE_FUNCTION
-  create_latch( const int N ) const
-    {
-      task_root_type * const task =
-        m_policy->allocate_task( sizeof(task_root_type) , 0 , 0 );
-      task->m_dep_size = N ; // Using m_dep_size for latch counter
-      task->m_state = TASK_STATE_WAITING ;
-      return Future< Latch , execution_space >( task );
-    }
-
-  //----------------------------------------
-
-  template< class FunctorType , class A3 , class A4 >
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( FunctorType * task_functor
-                     , const Future<A3,A4> & before
-                     , typename std::enable_if
-                        < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
-                        >::type * = 0
-                      ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      m_policy->add_dependence( get_task_root(task_functor) , before.m_task );
-#endif
-    }
-
-  template< class ValueType >
-  const Future< ValueType , execution_space > &
-    spawn( const Future< ValueType , execution_space > & f
-         , const bool priority = false ) const
-      {
-        if ( f.m_task ) {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-          f.m_task->m_queue =
-            ( f.m_task->m_team != 0
-            ? & ( m_policy->m_team[   priority ? 0 : 1 ] )
-            : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) );
-          m_policy->schedule_task( f.m_task );
-#endif
-        }
-        return f ;
-      }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void respawn( FunctorType * task_functor 
-              , const bool priority = false ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      task_root_type * const t = get_task_root(task_functor);
-      t->m_queue =
-        ( t->m_team != 0 ? & ( m_policy->m_team[   priority ? 0 : 1 ] )
-                         : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) );
-      m_policy->reschedule_task( t );
-#endif
-    }
-
-  // When a create method fails by returning a null Future
-  // the task that called the create method may respawn
-  // with a dependence on memory becoming available.
-  // This is a race as more than one task may be respawned
-  // with this need.
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void respawn_needing_memory( FunctorType * task_functor ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      task_root_type * const t = get_task_root(task_functor);
-      t->m_queue =
-        ( t->m_team != 0 ? & ( m_policy->m_team[   2 ] )
-                         : & ( m_policy->m_serial[ 2 ] ) );
-      m_policy->reschedule_task( t );
-#endif
-    }
-
-  //----------------------------------------
-  // Functions for an executing task functor to query dependences,
-  // set new dependences, and respawn itself.
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< void , execution_space >
-  get_dependence( const FunctorType * task_functor , int i ) const
-    {
-      return Future<void,execution_space>(
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        get_task_root(task_functor)->get_dependence(i)
-#endif
-        );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  int get_dependence( const FunctorType * task_functor ) const
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-    { return get_task_root(task_functor)->get_dependence(); }
-#else
-    { return 0 ; }
-#endif
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void clear_dependence( FunctorType * task_functor ) const
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-    { get_task_root(task_functor)->clear_dependence(); }
-#else
-    {}
-#endif
-
-  //----------------------------------------
-
-  static member_type & member_single();
-
-  friend void wait( TaskPolicy< Kokkos::Threads > & );
-};
-
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-
-#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
-#endif /* #ifndef KOKKOS_THREADS_TASKPOLICY_HPP */
-
-
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
index ed56536cd91b52f3d0beddc8095eba9a4bb593c9..d5d27cc8365c48ddd83077cc96511374f4658e89 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
@@ -41,2892 +41,6 @@
 //@HEADER
 */
 
-#ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP
-#define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP
-
-#include <type_traits>
-#include <initializer_list>
-
-#include <Kokkos_Core_fwd.hpp>
-#include <Kokkos_Pair.hpp>
-#include <Kokkos_Layout.hpp>
-#include <impl/Kokkos_Error.hpp>
-#include <impl/Kokkos_Traits.hpp>
-#include <impl/KokkosExp_ViewCtor.hpp>
-#include <impl/Kokkos_Atomic_View.hpp>
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-template< unsigned I , size_t ... Args >
-struct variadic_size_t
-  { enum { value = ~size_t(0) }; };
-
-template< size_t Val , size_t ... Args >
-struct variadic_size_t< 0 , Val , Args ... >
-  { enum { value = Val }; };
-
-template< unsigned I , size_t Val , size_t ... Args >
-struct variadic_size_t< I , Val , Args ... >
-  { enum { value = variadic_size_t< I - 1 , Args ... >::value }; };
-
-template< size_t ... Args >
-struct rank_dynamic ;
-
-template<>
-struct rank_dynamic<> { enum { value = 0 }; };
-
-template< size_t Val , size_t ... Args >
-struct rank_dynamic< Val , Args... >
-{
-  enum { value = ( Val == 0 ? 1 : 0 ) + rank_dynamic< Args... >::value };
-};
-
-#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \
-  template< size_t V , unsigned > struct ViewDimension ## R \
-    { \
-      enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \
-      enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \
-      KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \
-      ViewDimension ## R () = default ; \
-      ViewDimension ## R ( const ViewDimension ## R  & ) = default ; \
-      ViewDimension ## R & operator = ( const ViewDimension ## R  & ) = default ; \
-    }; \
-  template< unsigned RD > struct ViewDimension ## R < 0 , RD > \
-    { \
-      enum { ArgN ## R = 0 }; \
-      typename std::conditional<( RD < 3 ), size_t , unsigned >::type N ## R ; \
-      ViewDimension ## R () = default ; \
-      ViewDimension ## R ( const ViewDimension ## R  & ) = default ; \
-      ViewDimension ## R & operator = ( const ViewDimension ## R  & ) = default ; \
-      KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t V ) : N ## R ( V ) {} \
-    };
-
-KOKKOS_IMPL_VIEW_DIMENSION( 0 )
-KOKKOS_IMPL_VIEW_DIMENSION( 1 )
-KOKKOS_IMPL_VIEW_DIMENSION( 2 )
-KOKKOS_IMPL_VIEW_DIMENSION( 3 )
-KOKKOS_IMPL_VIEW_DIMENSION( 4 )
-KOKKOS_IMPL_VIEW_DIMENSION( 5 )
-KOKKOS_IMPL_VIEW_DIMENSION( 6 )
-KOKKOS_IMPL_VIEW_DIMENSION( 7 )
-
-#undef KOKKOS_IMPL_VIEW_DIMENSION
-
-template< size_t ... Vals >
-struct ViewDimension
-  : public ViewDimension0< variadic_size_t<0,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension1< variadic_size_t<1,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension2< variadic_size_t<2,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension3< variadic_size_t<3,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension4< variadic_size_t<4,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension5< variadic_size_t<5,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension6< variadic_size_t<6,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-  , public ViewDimension7< variadic_size_t<7,Vals...>::value 
-                         , rank_dynamic< Vals... >::value >
-{
-  typedef ViewDimension0< variadic_size_t<0,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D0 ;
-  typedef ViewDimension1< variadic_size_t<1,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D1 ;
-  typedef ViewDimension2< variadic_size_t<2,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D2 ;
-  typedef ViewDimension3< variadic_size_t<3,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D3 ;
-  typedef ViewDimension4< variadic_size_t<4,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D4 ;
-  typedef ViewDimension5< variadic_size_t<5,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D5 ;
-  typedef ViewDimension6< variadic_size_t<6,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D6 ;
-  typedef ViewDimension7< variadic_size_t<7,Vals...>::value 
-                        , rank_dynamic< Vals... >::value > D7 ;
-
-  using D0::ArgN0 ;
-  using D1::ArgN1 ;
-  using D2::ArgN2 ;
-  using D3::ArgN3 ;
-  using D4::ArgN4 ;
-  using D5::ArgN5 ;
-  using D6::ArgN6 ;
-  using D7::ArgN7 ;
-
-  using D0::N0 ;
-  using D1::N1 ;
-  using D2::N2 ;
-  using D3::N3 ;
-  using D4::N4 ;
-  using D5::N5 ;
-  using D6::N6 ;
-  using D7::N7 ;
-
-  enum { rank = sizeof...(Vals) };
-  enum { rank_dynamic = Impl::rank_dynamic< Vals... >::value };
-
-  ViewDimension() = default ;
-  ViewDimension( const ViewDimension & ) = default ;
-  ViewDimension & operator = ( const ViewDimension & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr
-  ViewDimension( size_t n0 , size_t n1 , size_t n2 , size_t n3
-               , size_t n4 , size_t n5 , size_t n6 , size_t n7 )
-    : D0( n0 )
-    , D1( n1 )
-    , D2( n2 )
-    , D3( n3 )
-    , D4( n4 )
-    , D5( n5 )
-    , D6( n6 )
-    , D7( n7 )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_t extent( const unsigned r ) const
-    {
-      return r == 0 ? N0 : (
-             r == 1 ? N1 : (
-             r == 2 ? N2 : (
-             r == 3 ? N3 : (
-             r == 4 ? N4 : (
-             r == 5 ? N5 : (
-             r == 6 ? N6 : (
-             r == 7 ? N7 : 0 )))))));
-    }
-
-  template< size_t N >
-  struct prepend { typedef ViewDimension< N , Vals... > type ; };
-
-  template< size_t N >
-  struct append { typedef ViewDimension< Vals... , N > type ; };
-};
-
-template< class A , class B >
-struct ViewDimensionJoin ;
-
-template< size_t ... A , size_t ... B >
-struct ViewDimensionJoin< ViewDimension< A... > , ViewDimension< B... > > {
-  typedef ViewDimension< A... , B... > type ;
-};
-
-//----------------------------------------------------------------------------
-
-template< class DstDim , class SrcDim >
-struct ViewDimensionAssignable ;
-
-template< size_t ... DstArgs , size_t ... SrcArgs >
-struct ViewDimensionAssignable< ViewDimension< DstArgs ... >
-                              , ViewDimension< SrcArgs ... > >
-{
-  typedef ViewDimension< DstArgs... > dst ;
-  typedef ViewDimension< SrcArgs... > src ;
-
-  enum { value =
-    unsigned(dst::rank) == unsigned(src::rank) && (
-      //Compile time check that potential static dimensions match
-      ( ( 1 > dst::rank_dynamic && 1 > src::rank_dynamic ) ? (size_t(dst::ArgN0) == size_t(src::ArgN0)) : true ) &&
-      ( ( 2 > dst::rank_dynamic && 2 > src::rank_dynamic ) ? (size_t(dst::ArgN1) == size_t(src::ArgN1)) : true ) &&
-      ( ( 3 > dst::rank_dynamic && 3 > src::rank_dynamic ) ? (size_t(dst::ArgN2) == size_t(src::ArgN2)) : true ) &&
-      ( ( 4 > dst::rank_dynamic && 4 > src::rank_dynamic ) ? (size_t(dst::ArgN3) == size_t(src::ArgN3)) : true ) &&
-      ( ( 5 > dst::rank_dynamic && 5 > src::rank_dynamic ) ? (size_t(dst::ArgN4) == size_t(src::ArgN4)) : true ) &&
-      ( ( 6 > dst::rank_dynamic && 6 > src::rank_dynamic ) ? (size_t(dst::ArgN5) == size_t(src::ArgN5)) : true ) &&
-      ( ( 7 > dst::rank_dynamic && 7 > src::rank_dynamic ) ? (size_t(dst::ArgN6) == size_t(src::ArgN6)) : true ) &&
-      ( ( 8 > dst::rank_dynamic && 8 > src::rank_dynamic ) ? (size_t(dst::ArgN7) == size_t(src::ArgN7)) : true )
-    )};
-
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-struct ALL_t {
-  KOKKOS_INLINE_FUNCTION
-  constexpr const ALL_t & operator()() const { return *this ; }
-};
-
-template< class T >
-struct is_integral_extent_type
-{ enum { value = std::is_same<T,Kokkos::Experimental::Impl::ALL_t>::value ? 1 : 0 }; };
-
-template< class iType >
-struct is_integral_extent_type< std::pair<iType,iType> >
-{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
-
-template< class iType >
-struct is_integral_extent_type< Kokkos::pair<iType,iType> >
-{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
-
-// Assuming '2 == initializer_list<iType>::size()'
-template< class iType >
-struct is_integral_extent_type< std::initializer_list<iType> >
-{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
-
-template < unsigned I , class ... Args >
-struct is_integral_extent
-{
-  // get_type is void when sizeof...(Args) <= I
-  typedef typename std::remove_cv<
-          typename std::remove_reference<
-          typename Kokkos::Impl::get_type<I,Args...
-          >::type >::type >::type type ;
-
-  enum { value = is_integral_extent_type<type>::value };
-
-  static_assert( value ||
-                 std::is_integral<type>::value ||
-                 std::is_same<type,void>::value 
-               , "subview argument must be either integral or integral extent" );
-};
-
-template< unsigned DomainRank , unsigned RangeRank >
-struct SubviewExtents {
-private:
-
-  // Cannot declare zero-length arrays
-  enum { InternalRangeRank = RangeRank ? RangeRank : 1u };
-
-  size_t   m_begin[  DomainRank ];
-  size_t   m_length[ InternalRangeRank ];
-  unsigned m_index[  InternalRangeRank ];
-
-  template< size_t ... DimArgs >
-  KOKKOS_FORCEINLINE_FUNCTION
-  bool set( unsigned domain_rank
-          , unsigned range_rank
-          , const ViewDimension< DimArgs ... > & dim )
-    { return true ; }
-
-  template< class T , size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  bool set( unsigned domain_rank
-          , unsigned range_rank
-          , const ViewDimension< DimArgs ... > & dim
-          , const T & val
-          , Args ... args )
-    {
-      const size_t v = static_cast<size_t>(val);
-
-      m_begin[ domain_rank ] = v ;
-
-      return set( domain_rank + 1 , range_rank , dim , args... )
-#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-             && ( v < dim.extent( domain_rank ) )
-#endif
-      ;
-    }
-
-  // ALL_t
-  template< size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  bool set( unsigned domain_rank
-          , unsigned range_rank
-          , const ViewDimension< DimArgs ... > & dim
-          , const Kokkos::Experimental::Impl::ALL_t 
-          , Args ... args )
-    {
-      m_begin[  domain_rank ] = 0 ;
-      m_length[ range_rank  ] = dim.extent( domain_rank );
-      m_index[  range_rank  ] = domain_rank ;
-
-      return set( domain_rank + 1 , range_rank + 1 , dim , args... );
-    }
-
-  // std::pair range
-  template< class T , size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  bool set( unsigned domain_rank
-          , unsigned range_rank
-          , const ViewDimension< DimArgs ... > & dim
-          , const std::pair<T,T> & val
-          , Args ... args )
-    {
-      const size_t b = static_cast<size_t>( val.first );
-      const size_t e = static_cast<size_t>( val.second );
-
-      m_begin[  domain_rank ] = b ;
-      m_length[ range_rank  ] = e - b ;
-      m_index[  range_rank  ] = domain_rank ;
-
-      return set( domain_rank + 1 , range_rank + 1 , dim , args... )
-#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-             && ( e <= b + dim.extent( domain_rank ) )
-#endif
-      ;
-    }
-
-  // Kokkos::pair range
-  template< class T , size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  bool set( unsigned domain_rank
-          , unsigned range_rank
-          , const ViewDimension< DimArgs ... > & dim
-          , const Kokkos::pair<T,T> & val
-          , Args ... args )
-    {
-      const size_t b = static_cast<size_t>( val.first );
-      const size_t e = static_cast<size_t>( val.second );
-
-      m_begin[  domain_rank ] = b ;
-      m_length[ range_rank  ] = e - b ;
-      m_index[  range_rank  ] = domain_rank ;
-
-      return set( domain_rank + 1 , range_rank + 1 , dim , args... )
-#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-             && ( e <= b + dim.extent( domain_rank ) )
-#endif
-      ;
-    }
-
-  // { begin , end } range
-  template< class T , size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  bool set( unsigned domain_rank
-          , unsigned range_rank
-          , const ViewDimension< DimArgs ... > & dim
-          , const std::initializer_list< T > & val
-          , Args ... args )
-    {
-      const size_t b = static_cast<size_t>( val.begin()[0] );
-      const size_t e = static_cast<size_t>( val.begin()[1] );
-
-      m_begin[  domain_rank ] = b ;
-      m_length[ range_rank  ] = e - b ;
-      m_index[  range_rank  ] = domain_rank ;
-
-      return set( domain_rank + 1 , range_rank + 1 , dim , args... )
-#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-             && ( val.size() == 2 )
-             && ( e <= b + dim.extent( domain_rank ) )
-#endif
-      ;
-    }
-
-  //------------------------------
-
-#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-
-  template< size_t ... DimArgs >
-  void error( char *
-            , int
-            , unsigned
-            , unsigned
-            , const ViewDimension< DimArgs ... > & ) const
-    {}
-
-  template< class T , size_t ... DimArgs , class ... Args >
-  void error( char * buf , int buf_len
-            , unsigned domain_rank
-            , unsigned range_rank
-            , const ViewDimension< DimArgs ... > & dim
-            , const T & val
-            , Args ... args ) const
-    {
-      const int n = std::min( buf_len ,
-        snprintf( buf , buf_len
-                , " %lu < %lu %c"
-                , static_cast<unsigned long>(val)
-                , static_cast<unsigned long>( dim.extent( domain_rank ) )
-                , int( sizeof...(Args) ? ',' : ')' ) ) );
-
-      error( buf+n, buf_len-n, domain_rank + 1 , range_rank , dim , args... );
-    }
-
-  // std::pair range
-  template< size_t ... DimArgs , class ... Args >
-  void error( char * buf , int buf_len
-            , unsigned domain_rank
-            , unsigned range_rank
-            , const ViewDimension< DimArgs ... > & dim
-            , const Kokkos::Experimental::Impl::ALL_t 
-            , Args ... args ) const
-    {
-      const int n = std::min( buf_len ,
-        snprintf( buf , buf_len
-                , " Kokkos::ALL %c" 
-                , int( sizeof...(Args) ? ',' : ')' ) ) );
-
-      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
-    }
-
-  // std::pair range
-  template< class T , size_t ... DimArgs , class ... Args >
-  void error( char * buf , int buf_len
-            , unsigned domain_rank
-            , unsigned range_rank
-            , const ViewDimension< DimArgs ... > & dim
-            , const std::pair<T,T> & val
-            , Args ... args ) const
-    {
-      // d <= e - b
-      const int n = std::min( buf_len ,
-        snprintf( buf , buf_len
-                , " %lu <= %lu - %lu %c"
-                , static_cast<unsigned long>( dim.extent( domain_rank ) )
-                , static_cast<unsigned long>( val.second )
-                , static_cast<unsigned long>( val.begin )
-                , int( sizeof...(Args) ? ',' : ')' ) ) );
-
-      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
-    }
-
-  // Kokkos::pair range
-  template< class T , size_t ... DimArgs , class ... Args >
-  void error( char * buf , int buf_len
-            , unsigned domain_rank
-            , unsigned range_rank
-            , const ViewDimension< DimArgs ... > & dim
-            , const Kokkos::pair<T,T> & val
-            , Args ... args ) const
-    {
-      // d <= e - b
-      const int n = std::min( buf_len ,
-        snprintf( buf , buf_len
-                , " %lu <= %lu - %lu %c"
-                , static_cast<unsigned long>( dim.extent( domain_rank ) )
-                , static_cast<unsigned long>( val.second )
-                , static_cast<unsigned long>( val.begin )
-                , int( sizeof...(Args) ? ',' : ')' ) ) );
-
-      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
-    }
-
-  // { begin , end } range
-  template< class T , size_t ... DimArgs , class ... Args >
-  void error( char * buf , int buf_len
-            , unsigned domain_rank
-            , unsigned range_rank
-            , const ViewDimension< DimArgs ... > & dim
-            , const std::initializer_list< T > & val
-            , Args ... args ) const
-    {
-      // d <= e - b
-      int n = 0 ;
-      if ( val.size() == 2 ) {
-        n = std::min( buf_len ,
-          snprintf( buf , buf_len
-                  , " %lu <= %lu - %lu %c"
-                  , static_cast<unsigned long>( dim.extent( domain_rank ) )
-                  , static_cast<unsigned long>( val.begin()[0] )
-                  , static_cast<unsigned long>( val.begin()[1] )
-                  , int( sizeof...(Args) ? ',' : ')' ) ) );
-      }
-      else {
-        n = std::min( buf_len ,
-          snprintf( buf , buf_len
-                  , " { ... }.size() == %u %c"
-                  , unsigned(val.size())
-                  , int( sizeof...(Args) ? ',' : ')' ) ) );
-      }
-
-      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
-    }
-
-  template< size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
-      enum { LEN = 1024 };
-      char buffer[ LEN ];
-
-      const int n = snprintf(buffer,LEN,"Kokkos::subview bounds error (");
-      error( buffer+n , LEN-n , 0 , 0 , dim , args... );
-
-      Kokkos::Impl::throw_runtime_exception(std::string(buffer));
-#else
-      Kokkos::abort("Kokkos::subview bounds error");
-#endif
-    }
-
-#else
-
-  template< size_t ... DimArgs , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  void error( const ViewDimension< DimArgs ... > & , Args ... ) const {}
-
-#endif
-
-public:
-
-  template< size_t ... DimArgs , class ... Args >
-  KOKKOS_INLINE_FUNCTION
-  SubviewExtents( const ViewDimension< DimArgs ... > & dim , Args ... args )
-    {
-      static_assert( DomainRank == sizeof...(DimArgs) , "" );
-      static_assert( DomainRank == sizeof...(Args) , "" );
-
-      // Verifies that all arguments, up to 8, are integral types,
-      // integral extents, or don't exist.
-      static_assert( RangeRank ==
-        unsigned( is_integral_extent<0,Args...>::value ) +
-        unsigned( is_integral_extent<1,Args...>::value ) +
-        unsigned( is_integral_extent<2,Args...>::value ) +
-        unsigned( is_integral_extent<3,Args...>::value ) +
-        unsigned( is_integral_extent<4,Args...>::value ) +
-        unsigned( is_integral_extent<5,Args...>::value ) +
-        unsigned( is_integral_extent<6,Args...>::value ) +
-        unsigned( is_integral_extent<7,Args...>::value ) , "" );
-
-      if ( RangeRank == 0 ) { m_length[0] = 0 ; m_index[0] = ~0u ; }
-
-      if ( ! set( 0 , 0 , dim , args... ) ) error( dim , args... );
-    }
-
-  template < typename iType >
-  KOKKOS_FORCEINLINE_FUNCTION
-  constexpr size_t domain_offset( const iType i ) const
-    { return unsigned(i) < DomainRank ? m_begin[i] : 0 ; }
-
-  template < typename iType >
-  KOKKOS_FORCEINLINE_FUNCTION
-  constexpr size_t range_extent( const iType i ) const
-    { return unsigned(i) < InternalRangeRank ? m_length[i] : 0 ; }
-
-  template < typename iType >
-  KOKKOS_FORCEINLINE_FUNCTION
-  constexpr unsigned range_index( const iType i ) const
-    { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; }
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-/** \brief  Given a value type and dimension generate the View data type */
-template< class T , class Dim >
-struct ViewDataType ;
-
-template< class T >
-struct ViewDataType< T , ViewDimension<> >
-{
-  typedef T type ;
-};
-
-template< class T , size_t ... Args >
-struct ViewDataType< T , ViewDimension< 0 , Args... > >
-{
-  typedef typename ViewDataType<T*,ViewDimension<Args...> >::type type ;
-};
-
-template< class T , size_t N , size_t ... Args >
-struct ViewDataType< T , ViewDimension< N , Args... > >
-{
-  typedef typename ViewDataType<T,ViewDimension<Args...> >::type type[N] ;
-};
-
-/**\brief  Analysis of View data type.
- *
- *  Data type conforms to one of the following patterns :
- *    {const} value_type [][#][#][#]
- *    {const} value_type ***[#][#][#]
- *  Where the sum of counts of '*' and '[#]' is at most ten.
- *
- *  Provide typedef for the ViewDimension<...> and value_type.
- */
-template< class T >
-struct ViewArrayAnalysis 
-{
-  typedef T                                      value_type ;
-  typedef typename std::add_const<    T >::type  const_value_type ;
-  typedef typename std::remove_const< T >::type  non_const_value_type ;
-  typedef ViewDimension<>                        static_dimension ;
-  typedef ViewDimension<>                        dynamic_dimension ;
-  typedef ViewDimension<>                        dimension ;
-};
-
-template< class T , size_t N >
-struct ViewArrayAnalysis< T[N] >
-{
-private:
-  typedef ViewArrayAnalysis< T > nested ;
-public:
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::const_value_type      const_value_type ;
-  typedef typename nested::non_const_value_type  non_const_value_type ;
-
-  typedef typename nested::static_dimension::template prepend<N>::type
-    static_dimension ;
-
-  typedef typename nested::dynamic_dimension dynamic_dimension ;
-
-  typedef typename
-    ViewDimensionJoin< dynamic_dimension , static_dimension >::type
-      dimension ;
-};
-
-template< class T >
-struct ViewArrayAnalysis< T[] >
-{
-private:
-  typedef ViewArrayAnalysis< T > nested ;
-  typedef typename nested::dimension nested_dimension ;
-public:
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::const_value_type      const_value_type ;
-  typedef typename nested::non_const_value_type  non_const_value_type ;
-
-  typedef typename nested::dynamic_dimension::template prepend<0>::type
-    dynamic_dimension ;
-
-  typedef typename nested::static_dimension static_dimension ;
-
-  typedef typename
-    ViewDimensionJoin< dynamic_dimension , static_dimension >::type
-      dimension ;
-};
-
-template< class T >
-struct ViewArrayAnalysis< T* >
-{
-private:
-  typedef ViewArrayAnalysis< T > nested ;
-public:
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::const_value_type      const_value_type ;
-  typedef typename nested::non_const_value_type  non_const_value_type ;
-
-  typedef typename nested::dynamic_dimension::template prepend<0>::type
-    dynamic_dimension ;
-
-  typedef typename nested::static_dimension static_dimension ;
-
-  typedef typename
-    ViewDimensionJoin< dynamic_dimension , static_dimension >::type
-      dimension ;
-};
-
-
-template< class DataType , class ArrayLayout , class ValueType >
-struct ViewDataAnalysis
-{
-private:
-
-  typedef ViewArrayAnalysis< DataType > array_analysis ;
-
-  // ValueType is opportunity for partial specialization.
-  // Must match array analysis when this default template is used.
-  static_assert( std::is_same< ValueType , typename array_analysis::non_const_value_type >::value , "" );
-
-public:
-
-  typedef void specialize ; // No specialization
-
-  typedef typename array_analysis::dimension             dimension ;
-  typedef typename array_analysis::value_type            value_type ;
-  typedef typename array_analysis::const_value_type      const_value_type ;
-  typedef typename array_analysis::non_const_value_type  non_const_value_type ;
-
-  // Generate analogous multidimensional array specification type.
-  typedef typename ViewDataType<           value_type , dimension >::type  type ;
-  typedef typename ViewDataType<     const_value_type , dimension >::type  const_type ;
-  typedef typename ViewDataType< non_const_value_type , dimension >::type  non_const_type ;
-
-  // Generate "flattened" multidimensional array specification type.
-  typedef type            scalar_array_type ;
-  typedef const_type      const_scalar_array_type ;
-  typedef non_const_type  non_const_scalar_array_type ;
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-template < class Dimension , class Layout , typename Enable = void >
-struct ViewOffset {
-  using is_mapping_plugin = std::false_type ;
-};
-
-//----------------------------------------------------------------------------
-// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
-template < class Dimension >
-struct ViewOffset< Dimension , Kokkos::LayoutLeft
-                 , typename std::enable_if<( 1 >= Dimension::rank
-                                             ||
-                                             0 == Dimension::rank_dynamic
-                                           )>::type >
-{
-  using is_mapping_plugin = std::true_type ;
-  using is_regular        = std::true_type ;
-
-  typedef size_t             size_type ;
-  typedef Dimension          dimension_type ;
-  typedef Kokkos::LayoutLeft array_layout ;
-
-  dimension_type m_dim ;
-
-  //----------------------------------------
-
-  // rank 1
-  template< typename I0 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 ) const { return i0 ; }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 , I1 const & i1 ) const
-    { return i0 + m_dim.N0 * i1 ; }
-
-  //rank 3
-  template < typename I0, typename I1, typename I2 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
-  {
-    return i0 + m_dim.N0 * ( i1 + m_dim.N1 * i2 );
-  }
-
-  //rank 4
-  template < typename I0, typename I1, typename I2, typename I3 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
-  {
-    return i0 + m_dim.N0 * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * i3 ));
-  }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4 ) const
-  {
-    return i0 + m_dim.N0 * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * i4 )));
-  }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5 ) const
-  {
-    return i0 + m_dim.N0 * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * (
-           i4 + m_dim.N4 * i5 ))));
-  }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
-  {
-    return i0 + m_dim.N0 * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * (
-           i4 + m_dim.N4 * (
-           i5 + m_dim.N5 * i6 )))));
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
-  {
-    return i0 + m_dim.N0 * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * (
-           i4 + m_dim.N4 * (
-           i5 + m_dim.N5 * (
-           i6 + m_dim.N6 * i7 ))))));
-  }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr array_layout layout() const
-    {
-      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
-                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
-    }
-
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
-
-  /* Cardinality of the domain index space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type size() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  /* Span of the range space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type span() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; }
-
-  /* Strides of dimensions */
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N0 * m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; }
-
-  // Stride with [ rank ] value is the total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      s[0] = 1 ;
-      if ( 0 < dimension_type::rank ) { s[1] = m_dim.N0 ; }
-      if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; }
-      if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; }
-      if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; }
-      if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; }
-      if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; }
-      if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; }
-      if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; }
-    }
-
-  //----------------------------------------
-
-  ViewOffset() = default ;
-  ViewOffset( const ViewOffset & ) = default ;
-  ViewOffset & operator = ( const ViewOffset & ) = default ;
-
-  template< unsigned TrivialScalarSize >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( std::integral_constant<unsigned,TrivialScalarSize> const &
-    , Kokkos::LayoutLeft const & arg_layout
-    )
-    : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 )
-    {}
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
-    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
-           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
-    {
-      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
-      // Also requires equal static dimensions ...
-    } 
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
-    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
-    {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" );
-    }
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
-    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
-    {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" );
-      if ( rhs.m_stride.S0 != 1 ) {
-        Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft from LayoutStride  requires stride == 1" );
-      }
-    }
-
-  //----------------------------------------
-  // Subview construction
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset(
-    const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
-    const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
-    : m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 )
-    {
-      static_assert( ( 0 == dimension_type::rank ) ||
-                     ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank )
-                   , "ViewOffset subview construction requires compatible rank" );
-    }
-};
-
-//----------------------------------------------------------------------------
-// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
-template < class Dimension >
-struct ViewOffset< Dimension , Kokkos::LayoutLeft
-                 , typename std::enable_if<( 1 < Dimension::rank
-                                             &&
-                                             0 < Dimension::rank_dynamic
-                                           )>::type >
-{
-  using is_mapping_plugin = std::true_type ;
-  using is_regular        = std::true_type ;
-
-  typedef size_t             size_type ;
-  typedef Dimension          dimension_type ;
-  typedef Kokkos::LayoutLeft array_layout ;
-
-  dimension_type m_dim ;
-  size_type      m_stride ;
-
-  //----------------------------------------
-
-  // rank 1
-  template< typename I0 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 ) const { return i0 ; }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 , I1 const & i1 ) const
-    { return i0 + m_stride * i1 ; }
-
-  //rank 3
-  template < typename I0, typename I1, typename I2 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
-  {
-    return i0 + m_stride * ( i1 + m_dim.N1 * i2 );
-  }
-
-  //rank 4
-  template < typename I0, typename I1, typename I2, typename I3 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
-  {
-    return i0 + m_stride * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * i3 ));
-  }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4 ) const
-  {
-    return i0 + m_stride * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * i4 )));
-  }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5 ) const
-  {
-    return i0 + m_stride * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * (
-           i4 + m_dim.N4 * i5 ))));
-  }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
-  {
-    return i0 + m_stride * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * (
-           i4 + m_dim.N4 * (
-           i5 + m_dim.N5 * i6 )))));
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
-  {
-    return i0 + m_stride * (
-           i1 + m_dim.N1 * (
-           i2 + m_dim.N2 * (
-           i3 + m_dim.N3 * (
-           i4 + m_dim.N4 * (
-           i5 + m_dim.N5 * (
-           i6 + m_dim.N6 * i7 ))))));
-  }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr array_layout layout() const
-    {
-      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
-                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
-    }
-
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
-
-  /* Cardinality of the domain index space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type size() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  /* Span of the range space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type span() const
-    { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N0 ; }
-
-  /* Strides of dimensions */
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride * m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride * m_dim.N1 * m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; }
-
-  // Stride with [ rank ] value is the total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      s[0] = 1 ;
-      if ( 0 < dimension_type::rank ) { s[1] = m_stride ; }
-      if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; }
-      if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; }
-      if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; }
-      if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; }
-      if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; }
-      if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; }
-      if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; }
-    }
-
-  //----------------------------------------
-
-private:
-
-  template< unsigned TrivialScalarSize >
-  struct Padding {
-    enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
-    enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
-
-    // If memory alignment is a multiple of the trivial scalar size then attempt to align.
-    enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 };
-    enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr
-
-    KOKKOS_INLINE_FUNCTION
-    static constexpr size_t stride( size_t const N )
-      {
-        return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) )
-               ? N + align - ( N % div_ok ) : N ;
-      }
-  };
-
-public:
-
-  ViewOffset() = default ;
-  ViewOffset( const ViewOffset & ) = default ;
-  ViewOffset & operator = ( const ViewOffset & ) = default ;
-
-  /* Enable padding for trivial scalar types with non-zero trivial scalar size */
-  template< unsigned TrivialScalarSize >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
-    , Kokkos::LayoutLeft const & arg_layout
-    )
-    : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
-           , arg_layout.dimension[2] , arg_layout.dimension[3]
-           , arg_layout.dimension[4] , arg_layout.dimension[5]
-           , arg_layout.dimension[6] , arg_layout.dimension[7]
-           )
-    , m_stride( Padding<TrivialScalarSize>::stride( arg_layout.dimension[0] ) )
-    {}
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
-    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
-           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
-    , m_stride( rhs.stride_1() )
-    {
-      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
-      // Also requires equal static dimensions ...
-    } 
-
-  //----------------------------------------
-  // Subview construction
-  // This subview must be 2 == rank and 2 == rank_dynamic
-  // due to only having stride #0.
-  // The source dimension #0 must be non-zero for stride-one leading dimension.
-  // At most subsequent dimension can be non-zero.
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
-      const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
-    : m_dim( sub.range_extent(0)
-           , sub.range_extent(1)
-           , 0, 0, 0, 0, 0, 0 )
-    , m_stride( ( 1 == sub.range_index(1) ? rhs.stride_1() :
-                ( 2 == sub.range_index(1) ? rhs.stride_2() :
-                ( 3 == sub.range_index(1) ? rhs.stride_3() :
-                ( 4 == sub.range_index(1) ? rhs.stride_4() :
-                ( 5 == sub.range_index(1) ? rhs.stride_5() :
-                ( 6 == sub.range_index(1) ? rhs.stride_6() :
-                ( 7 == sub.range_index(1) ? rhs.stride_7() : 0 ))))))))
-    {
-      static_assert( ( 2 == dimension_type::rank ) &&
-                     ( 2 == dimension_type::rank_dynamic ) &&
-                     ( 2 <= DimRHS::rank )
-                   , "ViewOffset subview construction requires compatible rank" );
-    }
-};
-
-//----------------------------------------------------------------------------
-// LayoutRight AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
-template < class Dimension >
-struct ViewOffset< Dimension , Kokkos::LayoutRight
-                 , typename std::enable_if<( 1 >= Dimension::rank
-                                             ||
-                                             0 == Dimension::rank_dynamic
-                                           )>::type >
-{
-  using is_mapping_plugin = std::true_type ;
-  using is_regular        = std::true_type ;
-
-  typedef size_t              size_type ;
-  typedef Dimension           dimension_type ;
-  typedef Kokkos::LayoutRight array_layout ;
-
-  dimension_type m_dim ;
-
-  //----------------------------------------
-
-  // rank 1
-  template< typename I0 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 ) const { return i0 ; }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 , I1 const & i1 ) const
-    { return i1 + m_dim.N1 * i0 ; }
-
-  //rank 3
-  template < typename I0, typename I1, typename I2 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
-  {
-    return i2 + m_dim.N2 * ( i1 + m_dim.N1 * ( i0 ));
-  }
-
-  //rank 4
-  template < typename I0, typename I1, typename I2, typename I3 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
-  {
-    return i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * (
-           i1 + m_dim.N1 * ( i0 )));
-  }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4 ) const
-  {
-    return i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * (
-           i1 + m_dim.N1 * ( i0 ))));
-  }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5 ) const
-  {
-    return i5 + m_dim.N5 * (
-           i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * (
-           i1 + m_dim.N1 * ( i0 )))));
-  }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
-  {
-    return i6 + m_dim.N6 * (
-           i5 + m_dim.N5 * (
-           i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * (
-           i1 + m_dim.N1 * ( i0 ))))));
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
-  {
-    return i7 + m_dim.N7 * (
-           i6 + m_dim.N6 * (
-           i5 + m_dim.N5 * (
-           i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * (
-           i1 + m_dim.N1 * ( i0 )))))));
-  }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr array_layout layout() const
-    {
-      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
-                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
-    }
-
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
-
-  /* Cardinality of the domain index space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type size() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  /* Span of the range space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type span() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; }
-
-  /* Strides of dimensions */
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; }
-
-  // Stride with [ rank ] value is the total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      size_type n = 1 ;
-      if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; }
-      if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; }
-      if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; }
-      if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; }
-      if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; }
-      if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; }
-      if ( 1 < dimension_type::rank ) { s[1] = n ; n *= m_dim.N1 ; }
-      if ( 0 < dimension_type::rank ) { s[0] = n ; }
-      s[dimension_type::rank] = n * m_dim.N0 ;
-    }
-
-  //----------------------------------------
-
-  ViewOffset() = default ;
-  ViewOffset( const ViewOffset & ) = default ;
-  ViewOffset & operator = ( const ViewOffset & ) = default ;
-
-  template< unsigned TrivialScalarSize >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( std::integral_constant<unsigned,TrivialScalarSize> const &
-    , Kokkos::LayoutRight const & arg_layout
-    )
-    : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 )
-    {}
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
-    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
-           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
-    {
-      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
-      // Also requires equal static dimensions ...
-    } 
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
-    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
-    {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" );
-    }
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
-    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
-    {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutLeft/Right and LayoutStride are only compatible when rank == 1" );
-      if ( rhs.m_stride.S0 != 1 ) {
-        Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft/Right from LayoutStride  requires stride == 1" );
-      }
-    }
-
-  //----------------------------------------
-  // Subview construction
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
-    , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
-    )
-    : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 )
-    {
-      static_assert( ( 0 == dimension_type::rank_dynamic ) ||
-                     ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank )
-                   , "ViewOffset subview construction requires compatible rank" );
-    }
-};
-
-//----------------------------------------------------------------------------
-// LayoutRight AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
-template < class Dimension >
-struct ViewOffset< Dimension , Kokkos::LayoutRight
-                 , typename std::enable_if<( 1 < Dimension::rank
-                                             &&
-                                             0 < Dimension::rank_dynamic
-                                           )>::type >
-{
-  using is_mapping_plugin = std::true_type ;
-  using is_regular        = std::true_type ;
-
-  typedef size_t               size_type ;
-  typedef Dimension            dimension_type ;
-  typedef Kokkos::LayoutRight  array_layout ;
-
-  dimension_type m_dim ;
-  size_type      m_stride ;
-
-  //----------------------------------------
-
-  // rank 1
-  template< typename I0 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 ) const { return i0 ; }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 , I1 const & i1 ) const
-  { return i1 + i0 * m_stride ; }
-
-  //rank 3
-  template < typename I0, typename I1, typename I2 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
-  { return i2 + m_dim.N2 * ( i1 ) + i0 * m_stride ; }
-
-  //rank 4
-  template < typename I0, typename I1, typename I2, typename I3 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
-  {
-    return i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * ( i1 )) +
-           i0 * m_stride ;
-  }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4 ) const
-  {
-    return i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * ( i1 ))) +
-           i0 * m_stride ;
-  }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5 ) const
-  {
-    return i5 + m_dim.N5 * (
-           i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * ( i1 )))) +
-           i0 * m_stride ;
-  }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
-  {
-    return i6 + m_dim.N6 * (
-           i5 + m_dim.N5 * (
-           i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * ( i1 ))))) +
-           i0 * m_stride ;
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
-  {
-    return i7 + m_dim.N7 * (
-           i6 + m_dim.N6 * (
-           i5 + m_dim.N5 * (
-           i4 + m_dim.N4 * (
-           i3 + m_dim.N3 * (
-           i2 + m_dim.N2 * ( i1 )))))) +
-           i0 * m_stride ;
-  }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr array_layout layout() const
-    {
-      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
-                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
-    }
-
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
-
-  /* Cardinality of the domain index space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type size() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-  /* Span of the range space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type span() const
-    { return m_dim.N0 * m_stride ; }
-
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
-    { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; }
-
-  /* Strides of dimensions */
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride ; }
-
-  // Stride with [ rank ] value is the total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      size_type n = 1 ;
-      if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; }
-      if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; }
-      if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; }
-      if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; }
-      if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; }
-      if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; }
-      if ( 1 < dimension_type::rank ) { s[1] = n ; }
-      if ( 0 < dimension_type::rank ) { s[0] = m_stride ; }
-      s[dimension_type::rank] = m_stride * m_dim.N0 ;
-    }
-
-  //----------------------------------------
-
-private:
-
-  template< unsigned TrivialScalarSize >
-  struct Padding {
-    enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
-    enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
-
-    // If memory alignment is a multiple of the trivial scalar size then attempt to align.
-    enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 };
-    enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr
-
-    KOKKOS_INLINE_FUNCTION
-    static constexpr size_t stride( size_t const N )
-    {
-      return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) )
-             ? N + align - ( N % div_ok ) : N ;
-    }
-  };
-
-public:
-
-  ViewOffset() = default ;
-  ViewOffset( const ViewOffset & ) = default ;
-  ViewOffset & operator = ( const ViewOffset & ) = default ;
-
-  /* Enable padding for trivial scalar types with non-zero trivial scalar size.  */
-  template< unsigned TrivialScalarSize >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
-    , Kokkos::LayoutRight const & arg_layout
-    )
-    : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
-           , arg_layout.dimension[2] , arg_layout.dimension[3]
-           , arg_layout.dimension[4] , arg_layout.dimension[5]
-           , arg_layout.dimension[6] , arg_layout.dimension[7]
-           )
-    , m_stride( Padding<TrivialScalarSize>::
-                  stride( /* 2 <= rank */
-                          m_dim.N1 * ( dimension_type::rank == 2 ? 1 :
-                          m_dim.N2 * ( dimension_type::rank == 3 ? 1 :
-                          m_dim.N3 * ( dimension_type::rank == 4 ? 1 :
-                          m_dim.N4 * ( dimension_type::rank == 5 ? 1 :
-                          m_dim.N5 * ( dimension_type::rank == 6 ? 1 :
-                          m_dim.N6 * ( dimension_type::rank == 7 ? 1 : m_dim.N7 )))))) ))
-    {}
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
-    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
-           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
-    , m_stride( rhs.stride_0() )
-    {
-      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
-      // Also requires equal static dimensions ...
-    } 
-
-  //----------------------------------------
-  // Subview construction
-  // Last dimension must be non-zero
-
-  template< class DimRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
-    , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
-    )
-    : m_dim( sub.range_extent(0)
-           , sub.range_extent(1)
-           , 0, 0, 0, 0, 0, 0 ) 
-    , m_stride( 0 == sub.range_index(0) ? rhs.stride_0() : (
-                1 == sub.range_index(0) ? rhs.stride_1() : (
-                2 == sub.range_index(0) ? rhs.stride_2() : (
-                3 == sub.range_index(0) ? rhs.stride_3() : (
-                4 == sub.range_index(0) ? rhs.stride_4() : (
-                5 == sub.range_index(0) ? rhs.stride_5() : (
-                6 == sub.range_index(0) ? rhs.stride_6() : 0 )))))))
-    {
-      // This subview must be 2 == rank and 2 == rank_dynamic
-      // due to only having stride #0.
-      // The source dimension #0 must be non-zero for stride-one leading dimension.
-      // At most subsequent dimension can be non-zero.
-
-      static_assert( ( 2 == dimension_type::rank ) &&
-                     ( 2 <= DimRHS::rank )
-                   , "ViewOffset subview construction requires compatible rank" );
-    }
-};
-
-//----------------------------------------------------------------------------
-/* Strided array layout only makes sense for 0 < rank */
-/* rank = 0 included for DynRankView case */
-
-template< unsigned Rank >
-struct ViewStride ;
-
-template<>
-struct ViewStride<0> {
-  enum { S0 = 0 , S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t , size_t , size_t , size_t
-                      , size_t , size_t , size_t , size_t )
-    {}
-};
-
-template<>
-struct ViewStride<1> {
-  size_t S0 ;
-  enum { S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t , size_t , size_t
-                      , size_t , size_t , size_t , size_t )
-    : S0( aS0 )
-    {}
-};
-
-template<>
-struct ViewStride<2> {
-  size_t S0 , S1 ;
-  enum { S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t , size_t
-                      , size_t , size_t , size_t , size_t )
-    : S0( aS0 ) , S1( aS1 )
-    {}
-};
-
-template<>
-struct ViewStride<3> {
-  size_t S0 , S1 , S2 ;
-  enum { S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t
-                      , size_t , size_t , size_t , size_t )
-    : S0( aS0 ) , S1( aS1 ) , S2( aS2 )
-    {}
-};
-
-template<>
-struct ViewStride<4> {
-  size_t S0 , S1 , S2 , S3 ;
-  enum { S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
-                      , size_t , size_t , size_t , size_t )
-    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
-    {}
-};
-
-template<>
-struct ViewStride<5> {
-  size_t S0 , S1 , S2 , S3 , S4 ;
-  enum { S5 = 0 , S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
-                      , size_t aS4 , size_t , size_t , size_t )
-    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
-    , S4( aS4 )
-    {}
-};
-
-template<>
-struct ViewStride<6> {
-  size_t S0 , S1 , S2 , S3 , S4 , S5 ;
-  enum { S6 = 0 , S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
-                      , size_t aS4 , size_t aS5 , size_t , size_t )
-    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
-    , S4( aS4 ) , S5( aS5 )
-    {}
-};
-
-template<>
-struct ViewStride<7> {
-  size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 ;
-  enum { S7 = 0 };
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
-                      , size_t aS4 , size_t aS5 , size_t aS6 , size_t )
-    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
-    , S4( aS4 ) , S5( aS5 ) , S6( aS6 )
-    {}
-};
-
-template<>
-struct ViewStride<8> {
-  size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 ;
-
-  ViewStride() = default ;
-  ViewStride( const ViewStride & ) = default ;
-  ViewStride & operator = ( const ViewStride & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
-                      , size_t aS4 , size_t aS5 , size_t aS6 , size_t aS7 )
-    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
-    , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) , S7( aS7 )
-    {}
-};
-
-template < class Dimension >
-struct ViewOffset< Dimension , Kokkos::LayoutStride
-                 , void >
-{
-private:
-  typedef ViewStride< Dimension::rank >  stride_type ;
-public:
-
-  using is_mapping_plugin = std::true_type ;
-  using is_regular        = std::true_type ;
-
-  typedef size_t                size_type ;
-  typedef Dimension             dimension_type ;
-  typedef Kokkos::LayoutStride  array_layout ;
-
-  dimension_type  m_dim ;
-  stride_type     m_stride ;
-
-  //----------------------------------------
-
-  // rank 1
-  template< typename I0 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 ) const
-  {
-    return i0 * m_stride.S0 ;
-  }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0 , I1 const & i1 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 ;
-  }
-
-  //rank 3
-  template < typename I0, typename I1, typename I2 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 +
-           i2 * m_stride.S2 ;
-  }
-
-  //rank 4
-  template < typename I0, typename I1, typename I2, typename I3 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 +
-           i2 * m_stride.S2 +
-           i3 * m_stride.S3 ;
-  }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 +
-           i2 * m_stride.S2 +
-           i3 * m_stride.S3 +
-           i4 * m_stride.S4 ;
-  }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 +
-           i2 * m_stride.S2 +
-           i3 * m_stride.S3 +
-           i4 * m_stride.S4 +
-           i5 * m_stride.S5 ;
-  }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 +
-           i2 * m_stride.S2 +
-           i3 * m_stride.S3 +
-           i4 * m_stride.S4 +
-           i5 * m_stride.S5 +
-           i6 * m_stride.S6 ;
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-           , typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_INLINE_FUNCTION constexpr
-  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
-                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
-  {
-    return i0 * m_stride.S0 +
-           i1 * m_stride.S1 +
-           i2 * m_stride.S2 +
-           i3 * m_stride.S3 +
-           i4 * m_stride.S4 +
-           i5 * m_stride.S5 +
-           i6 * m_stride.S6 +
-           i7 * m_stride.S7 ;
-  }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr array_layout layout() const
-    {
-      return array_layout( m_dim.N0 , m_stride.S0
-                         , m_dim.N1 , m_stride.S1
-                         , m_dim.N2 , m_stride.S2
-                         , m_dim.N3 , m_stride.S3
-                         , m_dim.N4 , m_stride.S4
-                         , m_dim.N5 , m_stride.S5
-                         , m_dim.N6 , m_stride.S6
-                         , m_dim.N7 , m_stride.S7
-                         );
-    }
-
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
-
-  /* Cardinality of the domain index space */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type size() const
-    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
-
-private:
-
-  KOKKOS_INLINE_FUNCTION
-  static constexpr size_type Max( size_type lhs , size_type rhs )
-    { return lhs < rhs ? rhs : lhs ; }
-
-public:
-
-  /* Span of the range space, largest stride * dimension */
-  KOKKOS_INLINE_FUNCTION
-  constexpr size_type span() const
-    {
-      return Max( m_dim.N0 * m_stride.S0 ,
-             Max( m_dim.N1 * m_stride.S1 ,
-             Max( m_dim.N2 * m_stride.S2 ,
-             Max( m_dim.N3 * m_stride.S3 ,
-             Max( m_dim.N4 * m_stride.S4 ,
-             Max( m_dim.N5 * m_stride.S5 ,
-             Max( m_dim.N6 * m_stride.S6 ,
-                  m_dim.N7 * m_stride.S7 )))))));
-    }
-
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return span() == size(); }
-
-  /* Strides of dimensions */
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride.S0 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride.S1 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride.S2 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride.S3 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride.S4 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride.S5 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride.S6 ; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride.S7 ; }
-
-  // Stride with [ rank ] value is the total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      if ( 0 < dimension_type::rank ) { s[0] = m_stride.S0 ; }
-      if ( 1 < dimension_type::rank ) { s[1] = m_stride.S1 ; }
-      if ( 2 < dimension_type::rank ) { s[2] = m_stride.S2 ; }
-      if ( 3 < dimension_type::rank ) { s[3] = m_stride.S3 ; }
-      if ( 4 < dimension_type::rank ) { s[4] = m_stride.S4 ; }
-      if ( 5 < dimension_type::rank ) { s[5] = m_stride.S5 ; }
-      if ( 6 < dimension_type::rank ) { s[6] = m_stride.S6 ; }
-      if ( 7 < dimension_type::rank ) { s[7] = m_stride.S7 ; }
-      s[dimension_type::rank] = span();
-    }
-
-  //----------------------------------------
-
-  ViewOffset() = default ;
-  ViewOffset( const ViewOffset & ) = default ;
-  ViewOffset & operator = ( const ViewOffset & ) = default ;
-
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( std::integral_constant<unsigned,0> const &
-                      , Kokkos::LayoutStride const & rhs )
-    : m_dim( rhs.dimension[0] , rhs.dimension[1] , rhs.dimension[2] , rhs.dimension[3]
-           , rhs.dimension[4] , rhs.dimension[5] , rhs.dimension[6] , rhs.dimension[7] )
-    , m_stride( rhs.stride[0] , rhs.stride[1] , rhs.stride[2] , rhs.stride[3]
-              , rhs.stride[4] , rhs.stride[5] , rhs.stride[6] , rhs.stride[7] )
-    {}
-
-  template< class DimRHS , class LayoutRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
-    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
-           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
-    , m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3()
-              , rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() )
-    {
-      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
-      // Also requires equal static dimensions ...
-    }
-
-  //----------------------------------------
-  // Subview construction
-
-private:
-
-  template< class DimRHS , class LayoutRHS >
-  KOKKOS_INLINE_FUNCTION static
-  constexpr size_t stride
-    ( unsigned r , const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
-    {
-      return r >  7 ? 0 : (
-             r == 0 ? rhs.stride_0() : (
-             r == 1 ? rhs.stride_1() : (
-             r == 2 ? rhs.stride_2() : (
-             r == 3 ? rhs.stride_3() : (
-             r == 4 ? rhs.stride_4() : (
-             r == 5 ? rhs.stride_5() : (
-             r == 6 ? rhs.stride_6() : rhs.stride_7() )))))));
-    }
-
-public:
-
-  template< class DimRHS , class LayoutRHS >
-  KOKKOS_INLINE_FUNCTION
-  constexpr ViewOffset
-    ( const ViewOffset< DimRHS , LayoutRHS , void > & rhs
-    , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
-    )
-    // range_extent(r) returns 0 when dimension_type::rank <= r
-    : m_dim( sub.range_extent(0)
-           , sub.range_extent(1)
-           , sub.range_extent(2)
-           , sub.range_extent(3)
-           , sub.range_extent(4)
-           , sub.range_extent(5)
-           , sub.range_extent(6)
-           , sub.range_extent(7)
-           )
-    // range_index(r) returns ~0u when dimension_type::rank <= r
-    , m_stride( stride( sub.range_index(0), rhs )
-              , stride( sub.range_index(1), rhs )
-              , stride( sub.range_index(2), rhs )
-              , stride( sub.range_index(3), rhs )
-              , stride( sub.range_index(4), rhs )
-              , stride( sub.range_index(5), rhs )
-              , stride( sub.range_index(6), rhs )
-              , stride( sub.range_index(7), rhs )
-              )
-    {}
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-/** \brief  ViewDataHandle provides the type of the 'data handle' which the view
- *          uses to access data with the [] operator. It also provides
- *          an allocate function and a function to extract a raw ptr from the
- *          data handle. ViewDataHandle also defines an enum ReferenceAble which
- *          specifies whether references/pointers to elements can be taken and a
- *          'return_type' which is what the view operators will give back.
- *          Specialisation of this object allows three things depending
- *          on ViewTraits and compiler options:
- *          (i)   Use special allocator (e.g. huge pages/small pages and pinned memory)
- *          (ii)  Use special data handle type (e.g. add Cuda Texture Object)
- *          (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads)
- */
-template< class Traits , class Enable = void >
-struct ViewDataHandle {
-
-  typedef typename Traits::value_type   value_type  ;
-  typedef typename Traits::value_type * handle_type ;
-  typedef typename Traits::value_type & return_type ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker  track_type  ;
-
-  KOKKOS_INLINE_FUNCTION
-  static handle_type assign( value_type * arg_data_ptr
-                           , track_type const & /*arg_tracker*/ )
-  {
-    return handle_type( arg_data_ptr );
-  }
-};
-
-template< class Traits >
-struct ViewDataHandle< Traits ,
-  typename std::enable_if<( std::is_same< typename Traits::non_const_value_type
-                                        , typename Traits::value_type >::value
-                            &&
-                            std::is_same< typename Traits::specialize , void >::value
-                            &&
-                            Traits::memory_traits::Atomic
-                          )>::type >
-{
-  typedef typename Traits::value_type  value_type ;
-  typedef typename Kokkos::Impl::AtomicViewDataHandle< Traits >  handle_type ;
-  typedef typename Kokkos::Impl::AtomicDataElement< Traits >     return_type ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker    track_type  ;
-
-  KOKKOS_INLINE_FUNCTION
-  static handle_type assign( value_type * arg_data_ptr
-                           , track_type const & /*arg_tracker*/ )
-  {
-    return handle_type( arg_data_ptr );
-  }
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-//----------------------------------------------------------------------------
-
-/*
- *  The construction, assignment to default, and destruction
- *  are merged into a single functor.
- *  Primarily to work around an unresolved CUDA back-end bug
- *  that would lose the destruction cuda device function when
- *  called from the shared memory tracking destruction.
- *  Secondarily to have two fewer partial specializations.
- */
-template< class ExecSpace
-        , class ValueType
-        , bool IsScalar = std::is_scalar< ValueType >::value
-        >
-struct ViewValueFunctor ;
-
-template< class ExecSpace , class ValueType >
-struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
-{
-  typedef Kokkos::RangePolicy< ExecSpace > PolicyType ;
-
-  ExecSpace   space ;
-  ValueType * ptr ;
-  size_t      n ;
-  bool        destroy ;
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const size_t i ) const
-    {
-      if ( destroy ) { (ptr+i)->~ValueType(); }
-      else           { new (ptr+i) ValueType(); }
-    }
-
-  ViewValueFunctor() = default ;
-  ViewValueFunctor( const ViewValueFunctor & ) = default ;
-  ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ;
-
-  ViewValueFunctor( ExecSpace   const & arg_space
-                  , ValueType * const arg_ptr
-                  , size_t      const arg_n )
-    : space( arg_space )
-    , ptr( arg_ptr )
-    , n( arg_n )
-    , destroy( false )
-    {}
-
-  void execute( bool arg )
-    {
-      destroy = arg ;
-      if ( ! space.in_parallel() ) {
-        const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
-          closure( *this , PolicyType( 0 , n ) );
-        closure.execute();
-        space.fence();
-      }
-      else {
-        for ( size_t i = 0 ; i < n ; ++i ) operator()(i);
-      }
-    }
-
-  void construct_shared_allocation()
-    { execute( false ); }
-
-  void destroy_shared_allocation()
-    { execute( true ); }
-};
-
-
-template< class ExecSpace , class ValueType >
-struct ViewValueFunctor< ExecSpace , ValueType , true /* is_scalar */ >
-{
-  typedef Kokkos::RangePolicy< ExecSpace > PolicyType ;
-
-  ExecSpace   space ;
-  ValueType * ptr ;
-  size_t      n ;
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const size_t i ) const
-    { ptr[i] = ValueType(); }
-
-  ViewValueFunctor() = default ;
-  ViewValueFunctor( const ViewValueFunctor & ) = default ;
-  ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ;
-
-  ViewValueFunctor( ExecSpace   const & arg_space
-                  , ValueType * const arg_ptr
-                  , size_t      const arg_n )
-    : space( arg_space )
-    , ptr( arg_ptr )
-    , n( arg_n )
-    {}
-
-  void construct_shared_allocation()
-    {
-      if ( ! space.in_parallel() ) {
-        const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
-          closure( *this , PolicyType( 0 , n ) );
-        closure.execute();
-        space.fence();
-      }
-      else {
-        for ( size_t i = 0 ; i < n ; ++i ) operator()(i);
-      }
-    }
-
-  void destroy_shared_allocation() {}
-};
-
-//----------------------------------------------------------------------------
-/** \brief  View mapping for non-specialized data type and standard layout */
-template< class Traits >
-class ViewMapping< Traits ,
-  typename std::enable_if<(
-    std::is_same< typename Traits::specialize , void >::value
-    &&
-    ViewOffset< typename Traits::dimension
-              , typename Traits::array_layout
-              , void >::is_mapping_plugin::value
-  )>::type >
-{
-private:
-
-  template< class , class ... > friend class ViewMapping ;
-  template< class , class ... > friend class Kokkos::Experimental::View ;
-
-  typedef ViewOffset< typename Traits::dimension
-                    , typename Traits::array_layout
-                    , void
-                    >  offset_type ;
-
-  typedef typename ViewDataHandle< Traits >::handle_type  handle_type ;
-
-  handle_type  m_handle ;
-  offset_type  m_offset ;
-
-  KOKKOS_INLINE_FUNCTION
-  ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
-    : m_handle( arg_handle )
-    , m_offset( arg_offset )
-    {}
-
-public:
-
-  //----------------------------------------
-  // Domain dimensions
-
-  enum { Rank = Traits::dimension::rank };
-
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
-    { return m_offset.m_dim.extent(r); }
-
-  KOKKOS_INLINE_FUNCTION constexpr
-  typename Traits::array_layout layout() const
-    { return m_offset.layout(); }
-
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
-
-  // Is a regular layout with uniform striding for each index.
-  using is_regular = typename offset_type::is_regular ;
-
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
-
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); }
-
-  //----------------------------------------
-  // Range span
-
-  /** \brief  Span of the mapped range */
-  KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); }
-
-  /** \brief  Is the mapped range span contiguous */
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); }
-
-  typedef typename ViewDataHandle< Traits >::return_type  reference_type ;
-  typedef typename Traits::value_type *                   pointer_type ;
-
-  /** \brief  If data references are lvalue_reference than can query pointer to memory */
-  KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
-    {
-      return std::is_lvalue_reference< reference_type >::value
-             ? (pointer_type) m_handle
-             : (pointer_type) 0 ;
-    }
-
-  //----------------------------------------
-  // The View class performs all rank and bounds checking before
-  // calling these element reference methods.
-
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference() const { return m_handle[0]; }
-
-  template< typename I0 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename
-    std::enable_if< std::is_integral<I0>::value &&
-                    ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
-                  , reference_type >::type
-  reference( const I0 & i0 ) const { return m_handle[i0]; }
-
-  template< typename I0 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename
-    std::enable_if< std::is_integral<I0>::value &&
-                    std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
-                  , reference_type >::type
-  reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; }
-
-  template< typename I0 , typename I1 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 ) const
-    { return m_handle[ m_offset(i0,i1) ]; }
-
-  template< typename I0 , typename I1 , typename I2 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
-    { return m_handle[ m_offset(i0,i1,i2) ]; }
-
-  template< typename I0 , typename I1 , typename I2 , typename I3 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3) ]; }
-
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-                          , const I4 & i4 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; }
-
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-                          , const I4 & i4 , const I5 & i5 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; }
-
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 , typename I6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-                          , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
-
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 , typename I6 , typename I7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-                          , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
-
-  //----------------------------------------
-
-private:
-
-  enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
-  enum { MemorySpanSize = sizeof(typename Traits::value_type) };
-
-public:
-
-  /** \brief  Span, in bytes, of the referenced memory */
-  KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
-    {
-      return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
-    }
-
-  //----------------------------------------
-
-  KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
-  KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {}
-  KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
-    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
-  KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
-    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
-
-  KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
-    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
-  KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
-    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
-
-  //----------------------------------------
-
-  /**\brief  Span, in bytes, of the required memory */
-  KOKKOS_INLINE_FUNCTION
-  static constexpr size_t memory_span( typename Traits::array_layout const & arg_layout )
-    {
-      typedef std::integral_constant< unsigned , 0 >  padding ;
-      return ( offset_type( padding(), arg_layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
-    }
-
-  /**\brief  Wrap a span of memory */
-  template< class ... P >
-  KOKKOS_INLINE_FUNCTION
-  ViewMapping( ViewCtorProp< P ... > const & arg_prop
-             , typename Traits::array_layout const & arg_layout
-             )
-    : m_handle( ( (ViewCtorProp<void,pointer_type> const &) arg_prop ).value )
-    , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
-    {}
-
-  //----------------------------------------
-  /*  Allocate and construct mapped array.
-   *  Allocate via shared allocation record and
-   *  return that record for allocation tracking.
-   */
-  template< class ... P >
-  SharedAllocationRecord<> *
-  allocate_shared( ViewCtorProp< P... > const & arg_prop
-                 , typename Traits::array_layout const & arg_layout )
-  {
-    typedef ViewCtorProp< P... > alloc_prop ;
-
-    typedef typename alloc_prop::execution_space  execution_space ;
-    typedef typename Traits::memory_space         memory_space ;
-    typedef typename Traits::value_type           value_type ;
-    typedef ViewValueFunctor< execution_space , value_type > functor_type ;
-    typedef SharedAllocationRecord< memory_space , functor_type > record_type ;
-
-    // Query the mapping for byte-size of allocation.
-    // If padding is allowed then pass in sizeof value type
-    // for padding computation.
-    typedef std::integral_constant
-      < unsigned
-      , alloc_prop::allow_padding ? sizeof(value_type) : 0
-      > padding ;
-
-    m_offset = offset_type( padding(), arg_layout );
-
-    const size_t alloc_size =
-      ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
-
-    // Create shared memory tracking record with allocate memory from the memory space
-    record_type * const record =
-      record_type::allocate( ( (ViewCtorProp<void,memory_space> const &) arg_prop ).value
-                           , ( (ViewCtorProp<void,std::string>  const &) arg_prop ).value
-                           , alloc_size );
-
-    //  Only set the the pointer and initialize if the allocation is non-zero.
-    //  May be zero if one of the dimensions is zero.
-    if ( alloc_size ) {
-
-      m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) );
-
-      if ( alloc_prop::initialize ) {
-        // Assume destruction is only required when construction is requested.
-        // The ViewValueFunctor has both value construction and destruction operators.
-        record->m_destroy = functor_type( ( (ViewCtorProp<void,execution_space> const &) arg_prop).value
-                                        , (value_type *) m_handle
-                                        , m_offset.span()
-                                        );
-
-        // Construct values
-        record->m_destroy.construct_shared_allocation();
-      }
-    }
-
-    return record ;
-  }
-};
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-/** \brief  Assign compatible default mappings */
-
-template< class DstTraits , class SrcTraits >
-class ViewMapping< DstTraits , SrcTraits ,
-  typename std::enable_if<(
-    std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
-    &&
-    std::is_same< typename DstTraits::specialize , void >::value
-    &&
-    std::is_same< typename SrcTraits::specialize , void >::value
-    &&
-    (
-      std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value
-      ||
-      (
-        (
-          std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
-          std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
-          std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
-        )
-        &&
-        (
-          std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
-          std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
-          std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
-        )
-      )
-    )
-  )>::type >
-{
-private:
-
-  enum { is_assignable_value_type =
-    std::is_same< typename DstTraits::value_type
-                , typename SrcTraits::value_type >::value ||
-    std::is_same< typename DstTraits::value_type
-                , typename SrcTraits::const_value_type >::value };
-
-  enum { is_assignable_dimension =
-    ViewDimensionAssignable< typename DstTraits::dimension
-                           , typename SrcTraits::dimension >::value };
-
-  enum { is_assignable_layout =
-    std::is_same< typename DstTraits::array_layout
-                , typename SrcTraits::array_layout >::value ||
-    std::is_same< typename DstTraits::array_layout
-                , Kokkos::LayoutStride >::value ||
-    ( DstTraits::dimension::rank == 0 ) ||
-    ( DstTraits::dimension::rank == 1 &&
-      DstTraits::dimension::rank_dynamic == 1 )
-    };
-
-public:
-
-  enum { is_assignable = is_assignable_value_type &&
-                         is_assignable_dimension &&
-                         is_assignable_layout };
-
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker  TrackType ;
-  typedef ViewMapping< DstTraits , void >  DstType ;
-  typedef ViewMapping< SrcTraits , void >  SrcType ;
-
-  KOKKOS_INLINE_FUNCTION
-  static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
-    {
-      static_assert( is_assignable_value_type
-                   , "View assignment must have same value type or const = non-const" );
-
-      static_assert( is_assignable_dimension
-                   , "View assignment must have compatible dimensions" );
-
-      static_assert( is_assignable_layout
-                   , "View assignment must have compatible layout or have rank <= 1" );
-
-      typedef typename DstType::offset_type  dst_offset_type ;
-
-      if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) {
-        typedef typename DstTraits::dimension dst_dim;
-        bool assignable =
-          ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN0 == src.dimension_0() : true ) &&
-          ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN1 == src.dimension_1() : true ) &&
-          ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN2 == src.dimension_2() : true ) &&
-          ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN3 == src.dimension_3() : true ) &&
-          ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN4 == src.dimension_4() : true ) &&
-          ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN5 == src.dimension_5() : true ) &&
-          ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN6 == src.dimension_6() : true ) &&
-          ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ?
-            dst_dim::ArgN7 == src.dimension_7() : true )
-          ;
-        if(!assignable)
-          Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension.");
-      }
-      dst.m_offset = dst_offset_type( src.m_offset );
-      dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track );
-    }
-};
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-// Subview mapping.
-// Deduce destination view type from source view traits and subview arguments
-
-template< class SrcTraits , class ... Args >
-struct ViewMapping
-  < typename std::enable_if<(
-      std::is_same< typename SrcTraits::specialize , void >::value
-      &&
-      (
-        std::is_same< typename SrcTraits::array_layout
-                    , Kokkos::LayoutLeft >::value ||
-        std::is_same< typename SrcTraits::array_layout
-                    , Kokkos::LayoutRight >::value ||
-        std::is_same< typename SrcTraits::array_layout
-                    , Kokkos::LayoutStride >::value
-      )
-    )>::type
-  , SrcTraits
-  , Args ... >
-{
-private:
-
-  static_assert( SrcTraits::rank == sizeof...(Args) ,
-    "Subview mapping requires one argument for each dimension of source View" );
-
-  enum
-    { RZ = false
-    , R0 = bool(is_integral_extent<0,Args...>::value)
-    , R1 = bool(is_integral_extent<1,Args...>::value)
-    , R2 = bool(is_integral_extent<2,Args...>::value)
-    , R3 = bool(is_integral_extent<3,Args...>::value)
-    , R4 = bool(is_integral_extent<4,Args...>::value)
-    , R5 = bool(is_integral_extent<5,Args...>::value)
-    , R6 = bool(is_integral_extent<6,Args...>::value)
-    , R7 = bool(is_integral_extent<7,Args...>::value)
-    };
-
-  enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
-              + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
-
-  // Whether right-most rank is a range.
-  enum { R0_rev = ( 0 == SrcTraits::rank ? RZ : (
-                    1 == SrcTraits::rank ? R0 : (
-                    2 == SrcTraits::rank ? R1 : (
-                    3 == SrcTraits::rank ? R2 : (
-                    4 == SrcTraits::rank ? R3 : (
-                    5 == SrcTraits::rank ? R4 : (
-                    6 == SrcTraits::rank ? R5 : (
-                    7 == SrcTraits::rank ? R6 : R7 )))))))) };
-
-  // Subview's layout
-  typedef typename std::conditional<
-      ( /* Same array layout IF */
-        ( rank == 0 ) /* output rank zero */
-        ||
-        // OutputRank 1 or 2, InputLayout Left, Interval 0
-        // because single stride one or second index has a stride.
-        ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ) //replace with input rank
-        ||
-        // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
-        // because single stride one or second index has a stride.
-        ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ) //replace input rank
-      ), typename SrcTraits::array_layout , Kokkos::LayoutStride
-      >::type array_layout ;
-
-  typedef typename SrcTraits::value_type  value_type ;
-
-  typedef typename std::conditional< rank == 0 , value_type ,
-          typename std::conditional< rank == 1 , value_type * ,
-          typename std::conditional< rank == 2 , value_type ** ,
-          typename std::conditional< rank == 3 , value_type *** ,
-          typename std::conditional< rank == 4 , value_type **** ,
-          typename std::conditional< rank == 5 , value_type ***** ,
-          typename std::conditional< rank == 6 , value_type ****** ,
-          typename std::conditional< rank == 7 , value_type ******* ,
-                                                 value_type ********
-          >::type >::type >::type >::type >::type >::type >::type >::type
-     data_type ;
-
-public:
-
-  typedef Kokkos::Experimental::ViewTraits
-    < data_type
-    , array_layout 
-    , typename SrcTraits::device_type
-    , typename SrcTraits::memory_traits > traits_type ;
-
-  typedef Kokkos::Experimental::View
-    < data_type
-    , array_layout 
-    , typename SrcTraits::device_type
-    , typename SrcTraits::memory_traits > type ;
-
-  template< class MemoryTraits >
-  struct apply {
-
-    static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
-
-    typedef Kokkos::Experimental::ViewTraits
-      < data_type 
-      , array_layout
-      , typename SrcTraits::device_type
-      , MemoryTraits > traits_type ;
-
-    typedef Kokkos::Experimental::View
-      < data_type 
-      , array_layout
-      , typename SrcTraits::device_type
-      , MemoryTraits > type ;
-  };
-
-  // The presumed type is 'ViewMapping< traits_type , void >'
-  // However, a compatible ViewMapping is acceptable.
-  template< class DstTraits >
-  KOKKOS_INLINE_FUNCTION
-  static void assign( ViewMapping< DstTraits , void > & dst
-                    , ViewMapping< SrcTraits , void > const & src
-                    , Args ... args )
-    {
-      static_assert(
-        ViewMapping< DstTraits , traits_type , void >::is_assignable ,
-        "Subview destination type must be compatible with subview derived type" );
-
-      typedef ViewMapping< DstTraits , void >  DstType ;
-
-      typedef typename DstType::offset_type  dst_offset_type ;
-      typedef typename DstType::handle_type  dst_handle_type ;
-
-      const SubviewExtents< SrcTraits::rank , rank >
-        extents( src.m_offset.m_dim , args... );
-
-      dst.m_offset = dst_offset_type( src.m_offset , extents );
-      dst.m_handle = dst_handle_type( src.m_handle +
-                                      src.m_offset( extents.domain_offset(0)
-                                                  , extents.domain_offset(1)
-                                                  , extents.domain_offset(2)
-                                                  , extents.domain_offset(3)
-                                                  , extents.domain_offset(4)
-                                                  , extents.domain_offset(5)
-                                                  , extents.domain_offset(6)
-                                                  , extents.domain_offset(7)
-                                                  ) );
-    }
-};
-
-
-
-//----------------------------------------------------------------------------
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-template< unsigned , class MapType >
-KOKKOS_INLINE_FUNCTION
-bool view_verify_operator_bounds( const MapType & )
-{ return true ; }
-
-template< unsigned R , class MapType , class iType , class ... Args >
-KOKKOS_INLINE_FUNCTION
-bool view_verify_operator_bounds
-  ( const MapType & map
-  , const iType   & i
-  , Args ... args
-  )
-{
-  return ( size_t(i) < map.extent(R) )
-         && view_verify_operator_bounds<R+1>( map , args ... );
-}
-
-template< unsigned , class MapType >
-inline
-void view_error_operator_bounds( char * , int , const MapType & )
-{}
-
-template< unsigned R , class MapType , class iType , class ... Args >
-inline
-void view_error_operator_bounds
-  ( char * buf
-  , int len
-  , const MapType & map
-  , const iType   & i
-  , Args ... args
-  )
-{
-  const int n =
-    snprintf(buf,len," %ld < %ld %c"
-            , static_cast<unsigned long>(i)
-            , static_cast<unsigned long>( map.extent(R) )
-            , ( sizeof...(Args) ? ',' : ')' )
-            );
-  view_error_operator_bounds<R+1>(buf+n,len-n,map,args...);
-}
-
-template< class MapType , class ... Args >
-KOKKOS_INLINE_FUNCTION
-void view_verify_operator_bounds
-  ( const MapType & map , Args ... args )
-{
-  if ( ! view_verify_operator_bounds<0>( map , args ... ) ) {
-#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
-    enum { LEN = 1024 };
-    char buffer[ LEN ];
-    int n = snprintf(buf,LEN,"View bounds error(" );
-    view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... );
-    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
-#else
-    Kokkos::abort("View bounds error");
-#endif
-  }
-}
-
-
-class Error_view_scalar_reference_to_non_scalar_view ;
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP */
+// Deprecated file for backward compatibility
 
+#include <impl/Kokkos_ViewMapping.hpp>
diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp
deleted file mode 100644
index 2de9df008ee5b42b5d38727ead56bae768869c43..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_ANALYZESHAPE_HPP
-#define KOKKOS_ANALYZESHAPE_HPP
-
-#include <impl/Kokkos_Shape.hpp>
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-//----------------------------------------------------------------------------
-
-/** \brief  Analyze the array shape defined by a Kokkos::View data type.
- *
- *  It is presumed that the data type can be mapped down to a multidimensional
- *  array of an intrinsic scalar numerical type (double, float, int, ... ).
- *  The 'value_type' of an array may be an embedded aggregate type such
- *  as a fixed length array 'Array<T,N>'.
- *  In this case the 'array_intrinsic_type' represents the
- *  underlying array of intrinsic scalar numerical type.
- *
- *  The embedded aggregate type must have an AnalyzeShape specialization
- *  to map it down to a shape and intrinsic scalar numerical type.
- */
-template< class T >
-struct AnalyzeShape : public Shape< sizeof(T) , 0 >
-{
-  typedef void specialize ;
-
-  typedef Shape< sizeof(T), 0 >  shape ;
-
-  typedef       T  array_intrinsic_type ;
-  typedef       T  value_type ;
-  typedef       T  type ;
-
-  typedef const T  const_array_intrinsic_type ;
-  typedef const T  const_value_type ;
-  typedef const T  const_type ;
-
-  typedef       T  non_const_array_intrinsic_type ;
-  typedef       T  non_const_value_type ;
-  typedef       T  non_const_type ;
-};
-
-template<>
-struct AnalyzeShape<void> : public Shape< 0 , 0 >
-{
-  typedef void specialize ;
-
-  typedef Shape< 0 , 0 >  shape ;
-
-  typedef       void  array_intrinsic_type ;
-  typedef       void  value_type ;
-  typedef       void  type ;
-  typedef const void  const_array_intrinsic_type ;
-  typedef const void  const_value_type ;
-  typedef const void  const_type ;
-  typedef       void  non_const_array_intrinsic_type ;
-  typedef       void  non_const_value_type ;
-  typedef       void  non_const_type ;
-};
-
-template< class T >
-struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
-{
-private:
-  typedef AnalyzeShape<T> nested ;
-public:
-
-  typedef typename nested::specialize specialize ;
-
-  typedef typename nested::shape shape ;
-
-  typedef typename nested::const_array_intrinsic_type  array_intrinsic_type ;
-  typedef typename nested::const_value_type            value_type ;
-  typedef typename nested::const_type                  type ;
-
-  typedef typename nested::const_array_intrinsic_type  const_array_intrinsic_type ;
-  typedef typename nested::const_value_type            const_value_type ;
-  typedef typename nested::const_type                  const_type ;
-
-  typedef typename nested::non_const_array_intrinsic_type  non_const_array_intrinsic_type ;
-  typedef typename nested::non_const_value_type            non_const_value_type ;
-  typedef typename nested::non_const_type                  non_const_type ;
-};
-
-template< class T >
-struct AnalyzeShape< T * >
-  : public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
-{
-private:
-  typedef AnalyzeShape<T> nested ;
-public:
-
-  typedef typename nested::specialize specialize ;
-
-  typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
-
-  typedef typename nested::array_intrinsic_type * array_intrinsic_type ;
-  typedef typename nested::value_type             value_type ;
-  typedef typename nested::type                 * type ;
-
-  typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ;
-  typedef typename nested::const_value_type             const_value_type ;
-  typedef typename nested::const_type                 * const_type ;
-
-  typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ;
-  typedef typename nested::non_const_value_type             non_const_value_type ;
-  typedef typename nested::non_const_type                 * non_const_type ;
-};
-
-template< class T >
-struct AnalyzeShape< T[] >
-  : public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
-{
-private:
-  typedef AnalyzeShape<T> nested ;
-public:
-
-  typedef typename nested::specialize specialize ;
-
-  typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
-
-  typedef typename nested::array_intrinsic_type  array_intrinsic_type [] ;
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::type                  type [] ;
-
-  typedef typename nested::const_array_intrinsic_type  const_array_intrinsic_type [] ;
-  typedef typename nested::const_value_type            const_value_type ;
-  typedef typename nested::const_type                  const_type [] ;
-
-  typedef typename nested::non_const_array_intrinsic_type  non_const_array_intrinsic_type [] ;
-  typedef typename nested::non_const_value_type            non_const_value_type ;
-  typedef typename nested::non_const_type                  non_const_type [] ;
-};
-
-template< class T >
-struct AnalyzeShape< const T[] >
-  : public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
-{
-private:
-  typedef AnalyzeShape< const T > nested ;
-public:
-
-  typedef typename nested::specialize specialize ;
-
-  typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
-
-  typedef typename nested::array_intrinsic_type  array_intrinsic_type [] ;
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::type                  type [] ;
-
-  typedef typename nested::const_array_intrinsic_type  const_array_intrinsic_type [] ;
-  typedef typename nested::const_value_type            const_value_type ;
-  typedef typename nested::const_type                  const_type [] ;
-
-  typedef typename nested::non_const_array_intrinsic_type  non_const_array_intrinsic_type [] ;
-  typedef typename nested::non_const_value_type            non_const_value_type ;
-  typedef typename nested::non_const_type                  non_const_type [] ;
-};
-
-template< class T , unsigned N >
-struct AnalyzeShape< T[N] >
-  : public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
-{
-private:
-  typedef AnalyzeShape<T> nested ;
-public:
-
-  typedef typename nested::specialize specialize ;
-
-  typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
-
-  typedef typename nested::array_intrinsic_type  array_intrinsic_type [N] ;
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::type                  type [N] ;
-
-  typedef typename nested::const_array_intrinsic_type  const_array_intrinsic_type [N] ;
-  typedef typename nested::const_value_type            const_value_type ;
-  typedef typename nested::const_type                  const_type [N] ;
-
-  typedef typename nested::non_const_array_intrinsic_type  non_const_array_intrinsic_type [N] ;
-  typedef typename nested::non_const_value_type            non_const_value_type ;
-  typedef typename nested::non_const_type                  non_const_type [N] ;
-};
-
-template< class T , unsigned N >
-struct AnalyzeShape< const T[N] >
-  : public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
-{
-private:
-  typedef AnalyzeShape< const T > nested ;
-public:
-
-  typedef typename nested::specialize specialize ;
-
-  typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
-
-  typedef typename nested::array_intrinsic_type  array_intrinsic_type [N] ;
-  typedef typename nested::value_type            value_type ;
-  typedef typename nested::type                  type [N] ;
-
-  typedef typename nested::const_array_intrinsic_type  const_array_intrinsic_type [N] ;
-  typedef typename nested::const_value_type            const_value_type ;
-  typedef typename nested::const_type                  const_type [N] ;
-
-  typedef typename nested::non_const_array_intrinsic_type  non_const_array_intrinsic_type [N] ;
-  typedef typename nested::non_const_value_type            non_const_value_type ;
-  typedef typename nested::non_const_type                  non_const_type [N] ;
-};
-
-} // namespace Impl
-} // namespace Kokkos
-
-#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */
-
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
index fd7ea845e7633d7415b0b9cd147f1da51ef93632..beafeaa5b50b82fab6dda7db598dc39e4a969f72 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
@@ -50,8 +50,9 @@ namespace Kokkos {
 // Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
 // Must cast-away 'volatile' for the CAS call.
 
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
 
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
 __inline__ __device__
 int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
 { return atomicCAS((int*)dest,compare,val); }
@@ -89,38 +90,44 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
 template < typename T >
 __inline__ __device__
 T atomic_compare_exchange( volatile T * const dest , const T & compare ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
              , const T >::type& val )
 {
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
-  int done = 1;
-  while ( done>0 ) {
-    done++;
-    if( Impl::lock_address_cuda_space( (void*) dest ) ) {
-      return_val = *dest;
-      if( return_val == compare )
-        *dest = val;
-      Impl::unlock_address_cuda_space( (void*) dest );
-      done = 0;
+  int done = 0;
+  unsigned int active = __ballot(1);
+  unsigned int done_active = 0;
+  while (active!=done_active) {
+    if(!done) {
+      if( Impl::lock_address_cuda_space( (void*) dest ) ) {
+        return_val = *dest;
+        if( return_val == compare )
+          *dest = val;
+        Impl::unlock_address_cuda_space( (void*) dest );
+        done = 1;
+      }
     }
+    done_active = __ballot(done);
   }
   return return_val;
 }
+#endif
+#endif
 
 //----------------------------------------------------------------------------
 // GCC native CAS supports int, long, unsigned int, unsigned long.
 // Intel native CAS support int and long with the same interface as GCC.
+#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
+#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
 
-#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
-
-KOKKOS_INLINE_FUNCTION
+inline
 int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
 { return __sync_val_compare_and_swap(dest,compare,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
 { return __sync_val_compare_and_swap(dest,compare,val); }
 
@@ -128,11 +135,11 @@ long atomic_compare_exchange( volatile long * const dest, const long compare, co
 
 // GCC supports unsigned
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
 { return __sync_val_compare_and_swap(dest,compare,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
                                        const unsigned long compare ,
                                        const unsigned long val )
@@ -141,7 +148,7 @@ unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
 #endif
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_compare_exchange( volatile T * const dest, const T & compare,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
 {
@@ -163,7 +170,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
 }
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_compare_exchange( volatile T * const dest, const T & compare,
   typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) == sizeof(long) , const T & >::type val )
@@ -187,7 +194,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
 
 #if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_compare_exchange( volatile T * const dest, const T & compare,
   typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) != sizeof(long) &&
@@ -207,7 +214,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
 template < typename T >
 inline
 T atomic_compare_exchange( volatile T * const dest , const T compare ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
             #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
@@ -254,6 +261,7 @@ T atomic_compare_exchange( volatile T * const dest, const T compare, const T val
   return retval;
 }
 
+#endif
 #endif
 
 template <typename T>
@@ -262,7 +270,6 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con
 {
   return compare == atomic_compare_exchange(dest, compare, val);
 }
-
 //----------------------------------------------------------------------------
 
 } // namespace Kokkos
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp
index 1438a37e454e556832549e2137202d971b4a09ce..7fc0e6984bbd2aacdc69dff2f1c6bfeed4493b1a 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp
@@ -44,6 +44,8 @@
 #if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
 #define KOKKOS_ATOMIC_DECREMENT
 
+#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
+
 namespace Kokkos {
 
 // Atomic increment
@@ -58,7 +60,7 @@ void atomic_decrement<char>(volatile char* a) {
       : "memory"
     );
 #else
-  Kokkos::atomic_fetch_add(a,-1);
+  Kokkos::atomic_fetch_sub(a, 1);
 #endif
 }
 
@@ -73,7 +75,7 @@ void atomic_decrement<short>(volatile short* a) {
       : "memory"
     );
 #else
-  Kokkos::atomic_fetch_add(a,-1);
+  Kokkos::atomic_fetch_sub(a, 1);
 #endif
 }
 
@@ -88,7 +90,7 @@ void atomic_decrement<int>(volatile int* a) {
       : "memory"
     );
 #else
-  Kokkos::atomic_fetch_add(a,-1);
+  Kokkos::atomic_fetch_sub(a, 1);
 #endif
 }
 
@@ -103,14 +105,14 @@ void atomic_decrement<long long int>(volatile long long int* a) {
       : "memory"
     );
 #else
-  Kokkos::atomic_fetch_add(a,-1);
+  Kokkos::atomic_fetch_sub(a, 1);
 #endif
 }
 
 template<typename T>
 KOKKOS_INLINE_FUNCTION
 void atomic_decrement(volatile T* a) {
-  Kokkos::atomic_fetch_add(a,-1);
+  Kokkos::atomic_fetch_sub(a, 1);
 }
 
 } // End of namespace Kokkos
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
index e8cac4ba3b82ba097016a3ba80b03b010a7df8c3..ae53b81779a21f285e6d5fe528b7f32a3baca212 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
@@ -48,7 +48,8 @@ namespace Kokkos {
 
 //----------------------------------------------------------------------------
 
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
 
 __inline__ __device__
 int atomic_exchange( volatile int * const dest , const int val )
@@ -99,22 +100,26 @@ T atomic_exchange(
 template < typename T >
 __inline__ __device__
 T atomic_exchange( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
              , const T >::type& val )
 {
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
-  int done = 1;
-  while ( done > 0 ) {
-    done++;
-    if( Impl::lock_address_cuda_space( (void*) dest ) ) {
-      return_val = *dest;
-      *dest = val;
-      Impl::unlock_address_cuda_space( (void*) dest );
-      done = 0;
+  int done = 0;
+  unsigned int active = __ballot(1);
+  unsigned int done_active = 0;
+  while (active!=done_active) {
+    if(!done) {
+      if( Impl::lock_address_cuda_space( (void*) dest ) ) {
+        return_val = *dest;
+        *dest = val;
+        Impl::unlock_address_cuda_space( (void*) dest );
+        done = 1;
+      }
     }
+    done_active = __ballot(done);
   }
   return return_val;
 }
@@ -152,12 +157,16 @@ void atomic_assign(
   (void) atomic_exchange(dest,val);
 }
 
+#endif
+#endif
+
 //----------------------------------------------------------------------------
 
-#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
+#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
+#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
 
 template< typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_exchange( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
                                   , const T & >::type val )
@@ -172,7 +181,7 @@ T atomic_exchange( volatile T * const dest ,
   union U {
     T val_T ;
     type val_type ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } old ;
 #else
   union { T val_T ; type val_type ; } old ;
@@ -190,7 +199,7 @@ T atomic_exchange( volatile T * const dest ,
 
 #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
 template< typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_exchange( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
                                   , const T & >::type val )
@@ -198,7 +207,7 @@ T atomic_exchange( volatile T * const dest ,
   union U {
     Impl::cas128_t i ;
     T t ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } assume , oldval , newval ;
 
   oldval.t = *dest ;
@@ -218,7 +227,7 @@ T atomic_exchange( volatile T * const dest ,
 template < typename T >
 inline
 T atomic_exchange( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
               #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
@@ -247,7 +256,7 @@ T atomic_exchange( volatile T * const dest ,
 }
 
 template< typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 void atomic_assign( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
                                   , const T & >::type val )
@@ -262,7 +271,7 @@ void atomic_assign( volatile T * const dest ,
   union U {
     T val_T ;
     type val_type ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } old ;
 #else
   union { T val_T ; type val_type ; } old ;
@@ -278,7 +287,7 @@ void atomic_assign( volatile T * const dest ,
 
 #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
 template< typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 void atomic_assign( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
                                   , const T & >::type val )
@@ -286,7 +295,7 @@ void atomic_assign( volatile T * const dest ,
   union U {
     Impl::cas128_t i ;
     T t ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } assume , oldval , newval ;
 
   oldval.t = *dest ;
@@ -301,7 +310,7 @@ void atomic_assign( volatile T * const dest ,
 template < typename T >
 inline
 void atomic_assign( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
               #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
@@ -325,7 +334,7 @@ void atomic_assign( volatile T * const dest ,
 #elif defined( KOKKOS_ATOMICS_USE_OMP31 )
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_exchange( volatile T * const dest , const T val )
 {
   T retval;
@@ -339,7 +348,7 @@ T atomic_exchange( volatile T * const dest , const T val )
 }
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 void atomic_assign( volatile T * const dest , const T val )
 {
 //#pragma omp atomic
@@ -350,7 +359,7 @@ void atomic_assign( volatile T * const dest , const T val )
 }
 
 #endif
-
+#endif
 } // namespace Kokkos
 
 #endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
index 62dfcdd2f88934f8d48b51e0637e9487d92c9a7e..08d2867ab434531a501a6e3b29dca4e2fb63edef 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
@@ -48,7 +48,8 @@ namespace Kokkos {
 
 //----------------------------------------------------------------------------
 
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
 
 // Support for int, unsigned int, unsigned long long int, and float
 
@@ -69,6 +70,12 @@ __inline__ __device__
 float atomic_fetch_add( volatile float * const dest , const float val )
 { return atomicAdd((float*)dest,val); }
 
+#if ( 600 <= __CUDA_ARCH__ )
+__inline__ __device__
+double atomic_fetch_add( volatile double * const dest , const double val )
+{ return atomicAdd((double*)dest,val); }
+#endif
+
 template < typename T >
 __inline__ __device__
 T atomic_fetch_add( volatile T * const dest ,
@@ -133,31 +140,38 @@ T atomic_fetch_add( volatile T * const dest ,
 template < typename T >
 __inline__ __device__
 T atomic_fetch_add( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
              , const T >::type& val )
 {
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
-  int done = 1;
-  while ( done>0 ) {
-    done++;
-    if( Impl::lock_address_cuda_space( (void*) dest ) ) {
-      return_val = *dest;
-      *dest = return_val + val;
-      Impl::unlock_address_cuda_space( (void*) dest );
-      done = 0;
+  int done = 0;
+  unsigned int active = __ballot(1);
+  unsigned int done_active = 0;
+  while (active!=done_active) {
+    if(!done) {
+      bool locked = Impl::lock_address_cuda_space( (void*) dest );
+      if( locked ) {
+        return_val = *dest;
+        *dest = return_val + val;
+        Impl::unlock_address_cuda_space( (void*) dest );
+        done = 1;
+      }
     }
+    done_active = __ballot(done);
   }
   return return_val;
 }
+#endif
+#endif
 //----------------------------------------------------------------------------
-
-#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
+#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
+#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
 
 #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
-KOKKOS_INLINE_FUNCTION
+inline
 int atomic_fetch_add( volatile int * dest , const int val )
 {
         int original = val;
@@ -172,29 +186,29 @@ int atomic_fetch_add( volatile int * dest , const int val )
         return original;
 }
 #else
-KOKKOS_INLINE_FUNCTION
+inline
 int atomic_fetch_add( volatile int * const dest , const int val )
 { return __sync_fetch_and_add(dest, val); }
 #endif
 
-KOKKOS_INLINE_FUNCTION
+inline
 long int atomic_fetch_add( volatile long int * const dest , const long int val )
 { return __sync_fetch_and_add(dest,val); }
 
 #if defined( KOKKOS_ATOMICS_USE_GCC )
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
 { return __sync_fetch_and_add(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
 { return __sync_fetch_and_add(dest,val); }
 
 #endif
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_fetch_add( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
 {
@@ -202,7 +216,7 @@ T atomic_fetch_add( volatile T * const dest ,
   union U {
     int i ;
     T t ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } assume , oldval , newval ;
 #else
   union U {
@@ -223,7 +237,7 @@ T atomic_fetch_add( volatile T * const dest ,
 }
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_fetch_add( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) == sizeof(long) , const T >::type val )
@@ -232,7 +246,7 @@ T atomic_fetch_add( volatile T * const dest ,
   union U {
     long i ;
     T t ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } assume , oldval , newval ;
 #else
   union U {
@@ -254,7 +268,7 @@ T atomic_fetch_add( volatile T * const dest ,
 
 #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_fetch_add( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) != sizeof(long) &&
@@ -263,7 +277,7 @@ T atomic_fetch_add( volatile T * const dest ,
   union U {
     Impl::cas128_t i ;
     T t ;
-    KOKKOS_INLINE_FUNCTION U() {};
+    inline U() {};
   } assume , oldval , newval ;
 
   oldval.t = *dest ;
@@ -283,7 +297,7 @@ T atomic_fetch_add( volatile T * const dest ,
 template < typename T >
 inline
 T atomic_fetch_add( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
               #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
@@ -325,7 +339,7 @@ T atomic_fetch_add( volatile T * const dest , const T val )
 }
 
 #endif
-
+#endif
 //----------------------------------------------------------------------------
 
 // Simpler version of atomic_fetch_add without the fetch
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
index 9b7ebae4ac6df12bae659e50aa7da34429ac3187..121a5d51928517981f711c369ae3125ac48e2ade 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
@@ -48,7 +48,8 @@ namespace Kokkos {
 
 //----------------------------------------------------------------------------
 
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
 
 // Support for int, unsigned int, unsigned long long int, and float
 
@@ -66,26 +67,27 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
                                          const unsigned long long int val )
 { return atomicAnd((unsigned long long int*)dest,val); }
 #endif
-
+#endif
+#endif
 //----------------------------------------------------------------------------
+#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
+#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
 
-#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
-
-KOKKOS_INLINE_FUNCTION
+inline
 int atomic_fetch_and( volatile int * const dest , const int val )
 { return __sync_fetch_and_and(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 long int atomic_fetch_and( volatile long int * const dest , const long int val )
 { return __sync_fetch_and_and(dest,val); }
 
 #if defined( KOKKOS_ATOMICS_USE_GCC )
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
 { return __sync_fetch_and_and(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
 { return __sync_fetch_and_and(dest,val); }
 
@@ -108,7 +110,7 @@ T atomic_fetch_and( volatile T * const dest , const T val )
 }
 
 #endif
-
+#endif
 //----------------------------------------------------------------------------
 
 // Simpler version of atomic_fetch_and without the fetch
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
index f15e61a3aea2ac2e7120d88a7151390cc2bf0b73..2c89f56705a51f86f686d5ce6b8cad2c52898bdf 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
@@ -48,7 +48,8 @@ namespace Kokkos {
 
 //----------------------------------------------------------------------------
 
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
 
 // Support for int, unsigned int, unsigned long long int, and float
 
@@ -66,26 +67,27 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
                                          const unsigned long long int val )
 { return atomicOr((unsigned long long int*)dest,val); }
 #endif
-
+#endif
+#endif
 //----------------------------------------------------------------------------
+#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
+#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
 
-#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
-
-KOKKOS_INLINE_FUNCTION
+inline
 int atomic_fetch_or( volatile int * const dest , const int val )
 { return __sync_fetch_and_or(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 long int atomic_fetch_or( volatile long int * const dest , const long int val )
 { return __sync_fetch_and_or(dest,val); }
 
 #if defined( KOKKOS_ATOMICS_USE_GCC )
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
 { return __sync_fetch_and_or(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
 { return __sync_fetch_and_or(dest,val); }
 
@@ -108,7 +110,7 @@ T atomic_fetch_or( volatile T * const dest , const T val )
 }
 
 #endif
-
+#endif
 //----------------------------------------------------------------------------
 
 // Simpler version of atomic_fetch_or without the fetch
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
index a3a57aa81c7f303cf74fe5d8d7c6a50dc36eeb2d..b51d2fe7828704e88923382407bb5a4521030bf3 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
@@ -48,7 +48,8 @@ namespace Kokkos {
 
 //----------------------------------------------------------------------------
 
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( KOKKOS_HAVE_CUDA )
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
 
 // Support for int, unsigned int, unsigned long long int, and float
 
@@ -103,7 +104,7 @@ T atomic_fetch_sub( volatile T * const dest ,
 template < typename T >
 __inline__ __device__
 T atomic_fetch_sub( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
              , const T >::type& val )
@@ -111,44 +112,49 @@ T atomic_fetch_sub( volatile T * const dest ,
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
   int done = 0;
-  while ( done>0 ) {
-    done++;
-    if( Impl::lock_address_cuda_space( (void*) dest ) ) {
-      return_val = *dest;
-      *dest = return_val - val;
-      Impl::unlock_address_cuda_space( (void*) dest );
-      done = 0;
+  unsigned int active = __ballot(1);
+  unsigned int done_active = 0;
+  while (active!=done_active) {
+    if(!done) {
+      if( Impl::lock_address_cuda_space( (void*) dest ) ) {
+        return_val = *dest;
+        *dest = return_val - val;
+        Impl::unlock_address_cuda_space( (void*) dest );
+        done = 1;
+      }
     }
+    done_active = __ballot(done);
   }
   return return_val;
 }
-
+#endif
+#endif
 //----------------------------------------------------------------------------
+#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
+#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
 
-#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
-
-KOKKOS_INLINE_FUNCTION
+inline
 int atomic_fetch_sub( volatile int * const dest , const int val )
 { return __sync_fetch_and_sub(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 long int atomic_fetch_sub( volatile long int * const dest , const long int val )
 { return __sync_fetch_and_sub(dest,val); }
 
 #if defined( KOKKOS_ATOMICS_USE_GCC )
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
 { return __sync_fetch_and_sub(dest,val); }
 
-KOKKOS_INLINE_FUNCTION
+inline
 unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val )
 { return __sync_fetch_and_sub(dest,val); }
 
 #endif
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_fetch_sub( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
 {
@@ -166,7 +172,7 @@ T atomic_fetch_sub( volatile T * const dest ,
 }
 
 template < typename T >
-KOKKOS_INLINE_FUNCTION
+inline
 T atomic_fetch_sub( volatile T * const dest ,
   typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) == sizeof(long) , const T >::type val )
@@ -190,7 +196,7 @@ T atomic_fetch_sub( volatile T * const dest ,
 template < typename T >
 inline
 T atomic_fetch_sub( volatile T * const dest ,
-    typename ::Kokkos::Impl::enable_if<
+    typename Kokkos::Impl::enable_if<
                   ( sizeof(T) != 4 )
                && ( sizeof(T) != 8 )
              , const T >::type& val )
@@ -219,7 +225,7 @@ T atomic_fetch_sub( volatile T * const dest , const T val )
 }
 
 #endif
-
+#endif
 // Simpler version of atomic_fetch_sub without the fetch
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
index 343e9bf4c48fa499199930ebbf9a1fb893e475da..527e1bb4e334e7a9e83a0021061f07fd5900db18 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
@@ -147,7 +147,7 @@ struct RShiftOper {
 template < class Oper, typename T >
 KOKKOS_INLINE_FUNCTION
 T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
-  typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
+  typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
 {
   union { unsigned long long int i ; T t ; } oldval , assume , newval ;
@@ -157,7 +157,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   do {
     assume.i = oldval.i ;
     newval.t = Oper::apply(assume.t, val) ;
-    oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
+    oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
   } while ( assume.i != oldval.i );
 
   return oldval.t ;
@@ -166,7 +166,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
 template < class Oper, typename T >
 KOKKOS_INLINE_FUNCTION
 T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
-  typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
+  typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
                                     sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
 {
   union { unsigned long long int i ; T t ; } oldval , assume , newval ;
@@ -176,7 +176,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
   do {
     assume.i = oldval.i ;
     newval.t = Oper::apply(assume.t, val) ;
-    oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
+    oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
   } while ( assume.i != oldval.i );
 
   return newval.t ;
@@ -185,7 +185,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
 template < class Oper, typename T >
 KOKKOS_INLINE_FUNCTION
 T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
-  typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
+  typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
 {
   union { int i ; T t ; } oldval , assume , newval ;
 
@@ -194,7 +194,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   do {
     assume.i = oldval.i ;
     newval.t = Oper::apply(assume.t, val) ;
-    oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
+    oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
   } while ( assume.i != oldval.i );
 
   return oldval.t ;
@@ -203,7 +203,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
 template < class Oper, typename T >
 KOKKOS_INLINE_FUNCTION
 T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
-  typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
+  typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
 {
   union { int i ; T t ; } oldval , assume , newval ;
 
@@ -212,7 +212,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
   do {
     assume.i = oldval.i ;
     newval.t = Oper::apply(assume.t, val) ;
-    oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
+    oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
   } while ( assume.i != oldval.i );
 
   return newval.t ;
@@ -221,7 +221,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
 template < class Oper, typename T >
 KOKKOS_INLINE_FUNCTION
 T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
-  typename ::Kokkos::Impl::enable_if<
+  typename Kokkos::Impl::enable_if<
                 ( sizeof(T) != 4 )
              && ( sizeof(T) != 8 )
           #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
@@ -238,15 +238,20 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   return return_val;
 #else
   // This is a way to (hopefully) avoid dead lock in a warp
-  int done = 1;
-  while ( done>0 ) {
-    done++;
-    if( Impl::lock_address_cuda_space( (void*) dest ) ) {
-      T return_val = *dest;
-      *dest = Oper::apply(return_val, val);;
-      Impl::unlock_address_cuda_space( (void*) dest );
-      done=0;
+  T return_val;
+  int done = 0;
+  unsigned int active = __ballot(1);
+  unsigned int done_active = 0;
+  while (active!=done_active) {
+    if(!done) {
+      if( Impl::lock_address_cuda_space( (void*) dest ) ) {
+        return_val = *dest;
+        *dest = Oper::apply(return_val, val);;
+        Impl::unlock_address_cuda_space( (void*) dest );
+        done=1;
+      }
     }
+    done_active = __ballot(done);
   }
   return return_val;
 #endif
@@ -255,7 +260,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
 template < class Oper, typename T >
 KOKKOS_INLINE_FUNCTION
 T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
-  typename ::Kokkos::Impl::enable_if<
+  typename Kokkos::Impl::enable_if<
                 ( sizeof(T) != 4 )
              && ( sizeof(T) != 8 )
           #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
@@ -271,16 +276,21 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
   Impl::unlock_address_host_space( (void*) dest );
   return return_val;
 #else
+  T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
-  int done = 1;
-  while ( done>0 ) {
-    done++;
-    if( Impl::lock_address_cuda_space( (void*) dest ) ) {
-      T return_val = Oper::apply(*dest, val);
-      *dest = return_val;
-      Impl::unlock_address_cuda_space( (void*) dest );
-      done=0;
+  int done = 0;
+  unsigned int active = __ballot(1);
+  unsigned int done_active = 0;
+  while (active!=done_active) {
+    if(!done) {
+      if( Impl::lock_address_cuda_space( (void*) dest ) ) {
+        return_val = Oper::apply(*dest, val);
+        *dest = return_val;
+        Impl::unlock_address_cuda_space( (void*) dest );
+        done=1;
+      }
     }
+    done_active = __ballot(done);
   }
   return return_val;
 #endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp
index b9d23bd815433a0a91c282dd6e787b7d16f8b0e3..8ee094675cb861f9daf2c8b054b6dbf7517b401d 100644
--- a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp
@@ -90,10 +90,10 @@ GetSystemInfo(&info);
 int mpi_ranks_per_node() {
   char *str;
   int ppn = 1;
-  if ((str = getenv("SLURM_TASKS_PER_NODE"))) {
-    ppn = atoi(str);
-    if(ppn<=0) ppn = 1;
-  }
+  //if ((str = getenv("SLURM_TASKS_PER_NODE"))) {
+  //  ppn = atoi(str);
+  //  if(ppn<=0) ppn = 1;
+  //}
   if ((str = getenv("MV2_COMM_WORLD_LOCAL_SIZE"))) {
     ppn = atoi(str);
     if(ppn<=0) ppn = 1;
@@ -108,9 +108,9 @@ int mpi_ranks_per_node() {
 int mpi_local_rank_on_node() {
   char *str;
   int local_rank=0;
-  if ((str = getenv("SLURM_LOCALID"))) {
-    local_rank = atoi(str);
-  }
+  //if ((str = getenv("SLURM_LOCALID"))) {
+  //  local_rank = atoi(str);
+  //}
   if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
     local_rank = atoi(str);
   }
diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
index 567a2141405719e3331b2327ca40097c24af775a..de1085986848232f0510548bfa4375d63855f652 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -84,8 +84,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
 #endif // defined( KOKKOS_HAVE_CUDA )
 
 #if defined( KOKKOS_HAVE_OPENMP )
-  if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
+  if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
     if(num_threads>0) {
       if(use_numa>0) {
         Kokkos::OpenMP::initialize(num_threads,use_numa);
@@ -104,8 +104,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
 #endif
 
 #if defined( KOKKOS_HAVE_PTHREAD )
-  if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
+  if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
     if(num_threads>0) {
       if(use_numa>0) {
         Kokkos::Threads::initialize(num_threads,use_numa);
@@ -129,14 +129,14 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
   // struct, you may remove this line of code.
   (void) args;
 
-  if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
+  if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
     Kokkos::Serial::initialize();
   }
 #endif
 
 #if defined( KOKKOS_HAVE_CUDA )
-  if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
+  if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
     if (use_gpu > -1) {
       Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
     }
@@ -155,16 +155,20 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
 void finalize_internal( const bool all_spaces = false )
 {
 
+#if (KOKKOS_ENABLE_PROFILING)
+    Kokkos::Profiling::finalize();
+#endif
+
 #if defined( KOKKOS_HAVE_CUDA )
-  if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
+  if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
     if(Kokkos::Cuda::is_initialized())
       Kokkos::Cuda::finalize();
   }
 #endif
 
 #if defined( KOKKOS_HAVE_OPENMP )
-  if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
+  if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
       all_spaces ) {
     if(Kokkos::OpenMP::is_initialized())
       Kokkos::OpenMP::finalize();
@@ -172,8 +176,8 @@ void finalize_internal( const bool all_spaces = false )
 #endif
 
 #if defined( KOKKOS_HAVE_PTHREAD )
-  if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
+  if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
       all_spaces ) {
     if(Kokkos::Threads::is_initialized())
       Kokkos::Threads::finalize();
@@ -181,46 +185,41 @@ void finalize_internal( const bool all_spaces = false )
 #endif
 
 #if defined( KOKKOS_HAVE_SERIAL )
-  if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
+  if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
       all_spaces ) {
     if(Kokkos::Serial::is_initialized())
       Kokkos::Serial::finalize();
   }
 #endif
-
-#if (KOKKOS_ENABLE_PROFILING)
-    Kokkos::Profiling::finalize();
-#endif
-
 }
 
 void fence_internal()
 {
 
 #if defined( KOKKOS_HAVE_CUDA )
-  if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
+  if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
     Kokkos::Cuda::fence();
   }
 #endif
 
 #if defined( KOKKOS_HAVE_OPENMP )
-  if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
+  if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
     Kokkos::OpenMP::fence();
   }
 #endif
 
 #if defined( KOKKOS_HAVE_PTHREAD )
-  if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
+  if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
     Kokkos::Threads::fence();
   }
 #endif
 
 #if defined( KOKKOS_HAVE_SERIAL )
-  if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
-      Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
+  if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
+      std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
     Kokkos::Serial::fence();
   }
 #endif
@@ -350,11 +349,11 @@ void initialize(int& narg, char* arg[])
 
         if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
           char *str;
-          if ((str = getenv("SLURM_LOCALID"))) {
-            int local_rank = atoi(str);
-            device = local_rank % ndevices;
-            if (device >= skip_device) device++;
-          }
+          //if ((str = getenv("SLURM_LOCALID"))) {
+          //  int local_rank = atoi(str);
+          //  device = local_rank % ndevices;
+          //  if (device >= skip_device) device++;
+          //}
           if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
             int local_rank = atoi(str);
             device = local_rank % ndevices;
diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp
index 5f88d662069bcb6313c803073385736e23a93456..5fab5eb9a7bd07c9868214607931d03e6ff770b0 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp
@@ -46,7 +46,7 @@
 
 #include <string>
 #include <iosfwd>
-#include <KokkosCore_config.h>
+#include <Kokkos_Macros.hpp>
 #ifdef KOKKOS_HAVE_CUDA
 #include <Cuda/Kokkos_Cuda_abort.hpp>
 #endif
@@ -68,12 +68,18 @@ std::string human_memory_size(size_t arg_bytes);
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+
 namespace Kokkos {
-inline
-void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
+KOKKOS_INLINE_FUNCTION
+void abort( const char * const message ) {
+#ifdef __CUDA_ARCH__
+  Kokkos::Impl::cuda_abort(message);
+#else
+  Kokkos::Impl::host_abort(message);
+#endif
+}
+
 }
-#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
index 78b6794491a77b78c1025b10fbe3d214fdc71fdb..66c3157c3aba4f6ae4b187d859790986c2458316 100644
--- a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
@@ -129,14 +129,14 @@ struct FunctorValueTraits< FunctorType , ArgTag , true /* == exists FunctorType:
   // Number of values if single value
   template< class F >
   KOKKOS_FORCEINLINE_FUNCTION static
-  typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type
+  typename Impl::enable_if< std::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type
     value_count( const F & ) { return 1 ; }
 
   // Number of values if an array, protect via templating because 'f.value_count'
   // will only exist when the functor declares the value_type to be an array.
   template< class F >
   KOKKOS_FORCEINLINE_FUNCTION static
-  typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type
+  typename Impl::enable_if< std::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type
     value_count( const F & f ) { return f.value_count ; }
 
   // Total size of the value
@@ -157,7 +157,7 @@ private:
   struct REJECTTAG {}; // Reject tagged operator() when using non-tagged execution policy.
 
   typedef typename
-    Impl::if_c< Impl::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ;
+    Impl::if_c< std::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ;
 
   //----------------------------------------
   // parallel_for operator without a tag:
@@ -339,8 +339,8 @@ private:
 
   typedef decltype( deduce_reduce_type( tag_type() , & FunctorType::operator() ) ) ValueType ;
 
-  enum { IS_VOID   = Impl::is_same<VOIDTAG  ,ValueType>::value };
-  enum { IS_REJECT = Impl::is_same<REJECTTAG,ValueType>::value };
+  enum { IS_VOID   = std::is_same<VOIDTAG  ,ValueType>::value };
+  enum { IS_REJECT = std::is_same<REJECTTAG,ValueType>::value };
 
 public:
 
diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
index 11cc120212b25804df0afb9f660ff8b165e0f217..95340261122ff51361bb45da62e1f236c1aef78d 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
@@ -62,6 +62,10 @@
 #include <memkind.h>
 #endif
 
+#if (KOKKOS_ENABLE_PROFILING)
+#include <impl/Kokkos_Profiling_Interface.hpp>
+#endif
+
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 #ifdef KOKKOS_HAVE_HBWSPACE
@@ -219,6 +223,10 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
   }
 }
 
+constexpr const char* HBWSpace::name() {
+  return m_name;
+}
+
 } // namespace Experimental
 } // namespace Kokkos
 
@@ -226,7 +234,6 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 SharedAllocationRecord< void , void >
@@ -242,6 +249,14 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
 SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
 ~SharedAllocationRecord()
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HBWSpace::name()),RecordBase::m_alloc_ptr->m_label,
+      data(),size());
+  }
+  #endif
+
   m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
                     , SharedAllocationRecord< void , void >::m_alloc_size
                     );
@@ -263,6 +278,12 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
       )
   , m_space( arg_space )
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+  }
+  #endif
+
   // Fill in the Header information
   RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
 
@@ -306,7 +327,7 @@ reallocate_tracked( void * const arg_alloc_ptr
   SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
   SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
 
-  Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data()
+  Kokkos::Impl::DeepCopy<Kokkos::Experimental::HBWSpace,Kokkos::Experimental::HBWSpace>( r_new->data() , r_old->data()
                                              , std::min( r_old->size() , r_new->size() ) );
 
   RecordBase::increment( r_new );
@@ -325,7 +346,7 @@ SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( voi
   RecordHost                   * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
 
   if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
-    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
   }
 
   return record ;
@@ -339,7 +360,6 @@ print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space ,
 }
 
 } // namespace Impl
-} // namespace Experimental
 } // namespace Kokkos
 
 /*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
index b52f4591ef0b8c0b71445f6e33b4d913822e5446..bfd13572b95208c9c8397728a460f79ae42465dd 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
@@ -43,7 +43,9 @@
 
 #include <algorithm>
 #include <Kokkos_Macros.hpp>
-
+#if (KOKKOS_ENABLE_PROFILING)
+#include <impl/Kokkos_Profiling_Interface.hpp>
+#endif
 /*--------------------------------------------------------------------------*/
 
 #if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
@@ -333,13 +335,15 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
   }
 }
 
+constexpr const char* HostSpace::name() {
+  return m_name;
+}
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 SharedAllocationRecord< void , void >
@@ -355,6 +359,14 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
 SharedAllocationRecord< Kokkos::HostSpace , void >::
 ~SharedAllocationRecord()
 {
+  #if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()),RecordBase::m_alloc_ptr->m_label,
+      data(),size());
+  }
+  #endif
+
   m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
                     , SharedAllocationRecord< void , void >::m_alloc_size
                     );
@@ -376,6 +388,11 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
       )
   , m_space( arg_space )
 {
+#if (KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+   }
+#endif
   // Fill in the Header information
   RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
 
@@ -438,7 +455,7 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
   RecordHost                   * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
 
   if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
-    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
   }
 
   return record ;
@@ -452,55 +469,6 @@ print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail
 }
 
 } // namespace Impl
-} // namespace Experimental
-} // namespace Kokkos
-
-/*--------------------------------------------------------------------------*/
-/*--------------------------------------------------------------------------*/
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-template< class >
-struct ViewOperatorBoundsErrorAbort ;
-
-template<>
-struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
- static void apply( const size_t rank
-                  , const size_t n0 , const size_t n1
-                  , const size_t n2 , const size_t n3
-                  , const size_t n4 , const size_t n5
-                  , const size_t n6 , const size_t n7
-                  , const size_t i0 , const size_t i1
-                  , const size_t i2 , const size_t i3
-                  , const size_t i4 , const size_t i5
-                  , const size_t i6 , const size_t i7 );
-};
-
-void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >::
-apply( const size_t rank
-     , const size_t n0 , const size_t n1
-     , const size_t n2 , const size_t n3
-     , const size_t n4 , const size_t n5
-     , const size_t n6 , const size_t n7
-     , const size_t i0 , const size_t i1
-     , const size_t i2 , const size_t i3
-     , const size_t i4 , const size_t i5
-     , const size_t i6 , const size_t i7 )
-{
-  char buffer[512];
-
-  snprintf( buffer , sizeof(buffer)
-          , "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)"
-          , rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7
-                 , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
-
-  Kokkos::Impl::throw_runtime_exception( buffer );
-}
-
-} // namespace Impl
-} // namespace Experimental
 } // namespace Kokkos
 
 /*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
index eb3da7501ebeeda048e0e8c78e81f20fb60060fa..5155c66df9b77fa667996cf191d862e6b05d8b52 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
@@ -50,7 +50,7 @@ namespace Kokkos {
 KOKKOS_FORCEINLINE_FUNCTION
 void memory_fence()
 {
-#if defined( KOKKOS_ATOMICS_USE_CUDA )
+#if defined( __CUDA_ARCH__ )
   __threadfence();
 #elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
       ( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
index 91faed170abbeb6d552b6247c74afdaa1596e038..99c5df4db31001b42f56337938f5a7ea73941157 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
@@ -48,6 +48,11 @@
 
 namespace Kokkos {
   namespace Profiling {
+
+    SpaceHandle::SpaceHandle(const char* space_name) {
+      strncpy(name,space_name,64);
+    }
+
     bool profileLibraryLoaded() {
        	return (NULL != initProfileLibrary);
     }
@@ -94,6 +99,33 @@ namespace Kokkos {
         }
     }
     
+
+    void pushRegion(const std::string& kName) {
+      if( NULL != pushRegionCallee ) {
+        Kokkos::fence();
+        (*pushRegionCallee)(kName.c_str());
+      }
+    }
+
+    void popRegion() {
+      if( NULL != popRegionCallee ) {
+        Kokkos::fence();
+        (*popRegionCallee)();
+      }
+    }
+
+    void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) {
+        if(NULL != allocateDataCallee) {
+            (*allocateDataCallee)(space,label.c_str(),ptr,size);
+        }
+    }
+
+    void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) {
+        if(NULL != allocateDataCallee) {
+            (*deallocateDataCallee)(space,label.c_str(),ptr,size);
+        }
+    }
+
     void initialize() {
 
         // Make sure initialize calls happens only once
@@ -145,6 +177,17 @@ namespace Kokkos {
                 initProfileLibrary = *((initFunction*) &p7);
                 auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library");
                 finalizeProfileLibrary = *((finalizeFunction*) &p8);
+
+                auto p9 = dlsym(firstProfileLibrary, "kokkosp_push_profile_region");
+                pushRegionCallee = *((pushFunction*) &p9);
+                auto p10 = dlsym(firstProfileLibrary, "kokkosp_pop_profile_region");
+                popRegionCallee = *((popFunction*) &p10);
+
+                auto p11 = dlsym(firstProfileLibrary, "kokkosp_allocate_data");
+                allocateDataCallee = *((allocateDataFunction*) &p11);
+                auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data");
+                deallocateDataCallee = *((deallocateDataFunction*) &p12);
+
             }
         }
 
@@ -170,14 +213,22 @@ namespace Kokkos {
         // Set all profile hooks to NULL to prevent
         // any additional calls. Once we are told to
         // finalize, we mean it
+        initProfileLibrary = NULL;
+        finalizeProfileLibrary = NULL;
+
         beginForCallee = NULL;
         beginScanCallee = NULL;
         beginReduceCallee = NULL;
         endScanCallee = NULL;
         endForCallee = NULL;
         endReduceCallee = NULL;
-        initProfileLibrary = NULL;
-        finalizeProfileLibrary = NULL;
+
+        pushRegionCallee = NULL;
+        popRegionCallee = NULL;
+
+        allocateDataCallee = NULL;
+        deallocateDataCallee = NULL;
+
       }
     }
   }
diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
index 4f01256335cd82962d1744a9895374c170a5cb8b..3d6a3892524ee3234a33f14cf7727cac5512e455 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
@@ -48,6 +48,7 @@
 #include <Kokkos_Core_fwd.hpp>
 #include <Kokkos_Macros.hpp>
 #include <string>
+#include <cinttypes>
 
 #if (KOKKOS_ENABLE_PROFILING)
 #include <impl/Kokkos_Profiling_DeviceInfo.hpp>
@@ -62,6 +63,11 @@
 namespace Kokkos {
   namespace Profiling {
 
+    struct SpaceHandle {
+      SpaceHandle(const char* space_name);
+      char name[64];
+    };
+
     typedef void (*initFunction)(const int,
 	const uint64_t,
 	const uint32_t,
@@ -70,8 +76,16 @@ namespace Kokkos {
     typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*);
     typedef void (*endFunction)(uint64_t);
 
+    typedef void (*pushFunction)(const char*);
+    typedef void (*popFunction)();
+
+    typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
+    typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
+
+
     static initFunction initProfileLibrary = NULL;
     static finalizeFunction finalizeProfileLibrary = NULL;
+
     static beginFunction beginForCallee = NULL;
     static beginFunction beginScanCallee = NULL;
     static beginFunction beginReduceCallee = NULL;
@@ -79,6 +93,13 @@ namespace Kokkos {
     static endFunction endScanCallee = NULL;
     static endFunction endReduceCallee = NULL;
 
+    static pushFunction pushRegionCallee = NULL;
+    static popFunction popRegionCallee = NULL;
+
+    static allocateDataFunction allocateDataCallee = NULL;
+    static deallocateDataFunction deallocateDataCallee = NULL;
+
+
     bool profileLibraryLoaded();
 
     void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
@@ -88,6 +109,12 @@ namespace Kokkos {
     void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
     void endParallelReduce(const uint64_t kernelID);
 
+    void pushRegion(const std::string& kName);
+    void popRegion();
+
+    void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
+    void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
+
     void initialize();
     void finalize();
 
@@ -105,8 +132,14 @@ namespace Kokkos {
         endScanCallee = NULL;
         endForCallee = NULL;
         endReduceCallee = NULL;
+
+        allocateDataCallee = NULL;
+        deallocateDataCallee = NULL;
+
         initProfileLibrary = NULL;
         finalizeProfileLibrary = NULL;
+        pushRegionCallee = NULL;
+        popRegionCallee = NULL;
       }
     }
 
diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
index e8bdbde6c60f182f588617dda2a9c2f32530694c..eb881545d2270d9cfa4b3e06c8a3a262e3c4fd7a 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
@@ -43,8 +43,9 @@
 
 #include <Kokkos_Core.hpp>
 
-#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
 
+#include <impl/Kokkos_Serial_Task.hpp>
 #include <impl/Kokkos_TaskQueue_impl.hpp>
 
 //----------------------------------------------------------------------------
@@ -143,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
 
 }} /* namespace Kokkos::Impl */
 
-#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
 
diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp
index 48a110c5f1583cd4943a011f3d33bd25e3cd00f2..473b7aadb2e29984b880ff1868b11b6461ed9df9 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -44,7 +44,9 @@
 #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP
 #define KOKKOS_IMPL_SERIAL_TASK_HPP
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+#include <impl/Kokkos_TaskQueue.hpp>
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
@@ -128,47 +130,63 @@ struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
     {}
 };
 
+//----------------------------------------------------------------------------
+
+template<typename iType>
+struct ThreadVectorRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
+{
+  typedef iType index_type;
+  const iType start ;
+  const iType end ;
+  enum {increment = 1};
+  TaskExec< Kokkos::Serial > & thread;
+
+  KOKKOS_INLINE_FUNCTION
+  ThreadVectorRangeBoundariesStruct
+    ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
+    : start( 0 )
+    , end(arg_count)
+    , thread(arg_thread)
+    {}
+};
+
 }} /* namespace Kokkos::Impl */
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-/*
-template<typename iType>
-KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
-TeamThreadRange( const Impl::TaskExec< Kokkos::Serial > & thread
-               , const iType & count )
-{
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
-}
-*/
-//TODO const issue omp
-template<typename iType>
+
+// OMP version needs non-const TaskExec
+template< typename iType >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
-TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread
-               , const iType & count )
+Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >
+TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType & count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( thread, count );
 }
-/*
-template<typename iType>
+
+// OMP version needs non-const TaskExec
+template< typename iType1, typename iType2 >
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
-TeamThreadRange( const Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::TaskExec< Kokkos::Serial > >
+TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType1 & start, const iType2 & end )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >(
+           thread, iType(start), iType(end) );
 }
-*/
-//TODO const issue omp
+
+// OMP version needs non-const TaskExec
 template<typename iType>
 KOKKOS_INLINE_FUNCTION
-Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
-TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
+Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
+ThreadVectorRange
+  ( Impl::TaskExec< Kokkos::Serial > & thread
+  , const iType & count )
 {
-  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
+  return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
 }
 
   /** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
@@ -177,7 +195,7 @@ TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start
    * This functionality requires C++11 support.*/
 template<typename iType, class Lambda>
 KOKKOS_INLINE_FUNCTION
-void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
+void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
   for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
     lambda(i);
 }
@@ -213,7 +231,7 @@ void parallel_reduce
 
   initialized_result = result;
 }
-// placeholder for future function
+
 template< typename iType, class Lambda, typename ValueType >
 KOKKOS_INLINE_FUNCTION
 void parallel_reduce
@@ -221,8 +239,17 @@ void parallel_reduce
    const Lambda & lambda,
    ValueType& initialized_result)
 {
+  initialized_result = ValueType();
+#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+#pragma ivdep
+#endif
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    ValueType tmp = ValueType();
+    lambda(i,tmp);
+    initialized_result+=tmp;
+  }
 }
-// placeholder for future function
+
 template< typename iType, class Lambda, typename ValueType, class JoinType >
 KOKKOS_INLINE_FUNCTION
 void parallel_reduce
@@ -231,6 +258,16 @@ void parallel_reduce
    const JoinType & join,
    ValueType& initialized_result)
 {
+  ValueType result = initialized_result;
+#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+#pragma ivdep
+#endif
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    ValueType tmp = ValueType();
+    lambda(i,tmp);
+    join(result,tmp);
+  }
+  initialized_result = result;
 }
 
 template< typename ValueType, typename iType, class Lambda >
@@ -266,6 +303,6 @@ void parallel_scan
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 #endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */
 
diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
deleted file mode 100644
index 1577df07cd74f2634f9f98cc94d3825062ad3ff6..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-// Experimental unified task-data parallel manycore LDRD
-
-#include <impl/Kokkos_Serial_TaskPolicy.hpp>
-
-#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
-
-#include <stdlib.h>
-#include <stdexcept>
-#include <iostream>
-#include <sstream>
-#include <string>
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-TaskPolicy< Kokkos::Serial >::member_type &
-TaskPolicy< Kokkos::Serial >::member_single()
-{
-  static member_type s(0,1,0); 
-  return s ;
-}
-
-} // namespace Experimental
-} // namespace Kokkos
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-typedef TaskMember<  Kokkos::Serial , void , void > Task ;
-
-//----------------------------------------------------------------------------
-
-namespace {
-
-inline
-unsigned padded_sizeof_derived( unsigned sizeof_derived )
-{
-  return sizeof_derived +
-    ( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
-}
-
-} // namespace
-
-void Task::deallocate( void * ptr )
-{
-  free( ptr );
-}
-
-void * Task::allocate( const unsigned arg_sizeof_derived
-                     , const unsigned arg_dependence_capacity )
-{
-  return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
-}
-
-Task::~TaskMember()
-{
-
-}
-
-Task::TaskMember( const Task::function_verify_type   arg_verify
-                , const Task::function_dealloc_type  arg_dealloc
-                , const Task::function_apply_type    arg_apply
-                , const unsigned                     arg_sizeof_derived
-                , const unsigned                     arg_dependence_capacity
-                )
-  : m_dealloc( arg_dealloc )
-  , m_verify(  arg_verify )
-  , m_apply(   arg_apply )
-  , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
-  , m_wait( 0 )
-  , m_next( 0 )
-  , m_dep_capacity( arg_dependence_capacity )
-  , m_dep_size( 0 )
-  , m_ref_count( 0 )
-  , m_state( TASK_STATE_CONSTRUCTING )
-{
-  for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
-}
-
-Task::TaskMember( const Task::function_dealloc_type  arg_dealloc
-                , const Task::function_apply_type    arg_apply
-                , const unsigned                     arg_sizeof_derived
-                , const unsigned                     arg_dependence_capacity
-                )
-  : m_dealloc( arg_dealloc )
-  , m_verify(  & Task::verify_type<void> )
-  , m_apply(   arg_apply )
-  , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
-  , m_wait( 0 )
-  , m_next( 0 )
-  , m_dep_capacity( arg_dependence_capacity )
-  , m_dep_size( 0 )
-  , m_ref_count( 0 )
-  , m_state( TASK_STATE_CONSTRUCTING )
-{
-  for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
-}
-
-//----------------------------------------------------------------------------
-
-void Task::throw_error_add_dependence() const
-{
-  std::cerr << "TaskMember< Serial >::add_dependence ERROR"
-            << " state(" << m_state << ")"
-            << " dep_size(" << m_dep_size << ")"
-            << std::endl ;
-  throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR");
-}
-
-void Task::throw_error_verify_type()
-{
-  throw std::runtime_error("TaskMember< Serial >::verify_type ERROR");
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-
-void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
-{
-  static const char msg_error_header[]      = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ;
-  static const char msg_error_count[]       = ": negative reference count" ;
-  static const char msg_error_complete[]    = ": destroy task that is not complete" ;
-  static const char msg_error_dependences[] = ": destroy task that has dependences" ;
-  static const char msg_error_exception[]   = ": caught internal exception" ;
-
-  const char * msg_error = 0 ;
-
-  try {
-
-    if ( *lhs ) {
-
-      const int count = --((**lhs).m_ref_count);
-
-      if ( 0 == count ) {
-
-        // Reference count at zero, delete it
-
-        // Should only be deallocating a completed task
-        if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
-
-          // A completed task should not have dependences...
-          for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) {
-            if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ;
-          }
-        }
-        else {
-          msg_error = msg_error_complete ;
-        }
-
-        if ( 0 == msg_error ) {
-          // Get deletion function and apply it
-          const Task::function_dealloc_type d = (**lhs).m_dealloc ;
-
-          (*d)( *lhs );
-        }
-      }
-      else if ( count <= 0 ) {
-        msg_error = msg_error_count ;
-      }
-    }
-
-    if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); }
-
-    *lhs = rhs ;
-  }
-  catch( ... ) {
-    if ( 0 == msg_error ) msg_error = msg_error_exception ;
-  }
-
-  if ( 0 != msg_error ) {
-    if ( no_throw ) {
-      std::cerr << msg_error_header << msg_error << std::endl ;
-      std::cerr.flush();
-    }
-    else {
-      std::string msg(msg_error_header);
-      msg.append(msg_error);
-      throw std::runtime_error( msg );
-    }
-  }
-}
-#endif
-
-namespace {
-
-Task * s_ready = 0 ;
-Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) );
-
-}
-
-void Task::schedule()
-{
-  // Execute ready tasks in case the task being scheduled
-  // is dependent upon a waiting and ready task.
-
-  Task::execute_ready_tasks();
-
-  // spawning   : Constructing -> Waiting
-  // respawning : Executing    -> Waiting
-  // updating   : Waiting      -> Waiting
-
-  // Must not be in a dependence linked list:  0 == t->m_next
-
-  const bool ok_state = TASK_STATE_COMPLETE != m_state ;
-  const bool ok_list  = 0 == m_next ;
-
-  if ( ok_state && ok_list ) {
-
-    if ( TASK_STATE_CONSTRUCTING == m_state ) {
-      // Initial scheduling increment,
-      // matched by decrement when task is complete.
-      ++m_ref_count ;
-    }
-
-    // Will be waiting for execution upon return from this function
-
-    m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
-
-    // Insert this task into another dependence that is not complete
-
-    int i = 0 ;
-    for ( ; i < m_dep_size ; ++i ) {
-      Task * const y = m_dep[i] ;
-      if ( y && s_denied != ( m_next = y->m_wait ) ) {
-        y->m_wait = this ; // CAS( & y->m_wait , m_next , this );
-        break ;
-      }
-    }
-    if ( i == m_dep_size ) {
-      // All dependences are complete, insert into the ready list
-      m_next  = s_ready ;
-      s_ready = this ; // CAS( & s_ready , m_next = s_ready , this );
-    }
-  }
-  else {
-    throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error"));
-  }
-}
-
-void Task::execute_ready_tasks()
-{
-  while ( s_ready ) {
-
-    // Remove this task from the ready list
-
-    // Task * task ;
-    // while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
-
-    Task * task = s_ready ;
-
-    s_ready = task->m_next ;
-
-    task->m_next = 0 ;
-
-    // precondition: task->m_state = TASK_STATE_WAITING
-    // precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE  for all i
-    // precondition: does not exist T such that T->m_wait = task
-    // precondition: does not exist T such that T->m_next = task
-
-    task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ;
-
-    (*task->m_apply)( task );
-
-    if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) {
-      // task did not respawn itself
-      task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
-
-      // release dependences:
-      for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
-        assign( task->m_dep + i , 0 );
-      }
-
-      // Stop other tasks from adding themselves to 'task->m_wait' ;
-
-      Task * x ;
-      // CAS( & task->m_wait , x = task->m_wait , s_denied );
-      x = task->m_wait ; task->m_wait = s_denied ;
-
-      // update tasks waiting on this task
-      while ( x ) {
-        Task * const next = x->m_next ;
-
-        x->m_next = 0 ;
-
-        x->schedule(); // could happen concurrently
-
-        x = next ;
-      }
-
-      // Decrement to match the initial scheduling increment
-      assign( & task , 0 );
-    }
-  }
-}
-
-} // namespace Impl
-} // namespace Experimental
-} // namespace Kokkos
-
-#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
-
diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp
deleted file mode 100644
index a333f948ae18e3e3622d06551dd935aff0d77707..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-// Experimental unified task-data parallel manycore LDRD
-
-#ifndef KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
-#define KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
-
-#include <Kokkos_Macros.hpp>
-
-#if defined( KOKKOS_HAVE_SERIAL )
-
-#include <string>
-#include <typeinfo>
-#include <stdexcept>
-
-#include <Kokkos_Serial.hpp>
-#include <Kokkos_TaskPolicy.hpp>
-#include <Kokkos_View.hpp>
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-#include <impl/Kokkos_FunctorAdapter.hpp>
-
-//----------------------------------------------------------------------------
-/*  Inheritance structure to allow static_cast from the task root type
- *  and a task's FunctorType.
- *
- *    task_root_type == TaskMember< Space , void , void >
- *
- *    TaskMember< PolicyType , ResultType , FunctorType >
- *      : TaskMember< PolicyType::Space , ResultType , FunctorType >
- *      { ... };
- *
- *    TaskMember< Space , ResultType , FunctorType >
- *      : TaskMember< Space , ResultType , void >
- *      , FunctorType
- *      { ... };
- *
- *  when ResultType != void
- *
- *    TaskMember< Space , ResultType , void >
- *      : TaskMember< Space , void , void >
- *      { ... };
- *
- */
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-/** \brief  Base class for all tasks in the Serial execution space */
-template<>
-class TaskMember< Kokkos::Serial , void , void >
-{
-public:
-
-  typedef void         (* function_apply_type)  ( TaskMember * );
-  typedef void         (* function_dealloc_type)( TaskMember * );
-  typedef TaskMember * (* function_verify_type) ( TaskMember * );
-
-private:
-
-  const function_dealloc_type  m_dealloc ; ///< Deallocation
-  const function_verify_type   m_verify ;  ///< Result type verification
-  const function_apply_type    m_apply ;   ///< Apply function
-  TaskMember ** const          m_dep ;     ///< Dependences
-  TaskMember *                 m_wait ;    ///< Linked list of tasks waiting on this task
-  TaskMember *                 m_next ;    ///< Linked list of tasks waiting on a different task
-  const int                    m_dep_capacity ; ///< Capacity of dependences
-  int                          m_dep_size ;     ///< Actual count of dependences
-  int                          m_ref_count ;    ///< Reference count
-  int                          m_state ;        ///< State of the task
-
-  // size = 6 Pointers + 4 ints
-
-  TaskMember() /* = delete */ ;
-  TaskMember( const TaskMember & ) /* = delete */ ;
-  TaskMember & operator = ( const TaskMember & ) /* = delete */ ;
-
-  static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity );
-  static void   deallocate( void * );
-
-  void throw_error_add_dependence() const ;
-  static void throw_error_verify_type();
-
-  template < class DerivedTaskType >
-  static
-  void deallocate( TaskMember * t )
-    {
-      DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t);
-      ptr->~DerivedTaskType();
-      deallocate( (void *) ptr );
-    }
-
-protected :
-
-  ~TaskMember();
-
-  // Used by TaskMember< Serial , ResultType , void >
-  TaskMember( const function_verify_type   arg_verify
-            , const function_dealloc_type  arg_dealloc
-            , const function_apply_type    arg_apply
-            , const unsigned               arg_sizeof_derived
-            , const unsigned               arg_dependence_capacity
-            );
-
-  // Used for TaskMember< Serial , void , void >
-  TaskMember( const function_dealloc_type  arg_dealloc
-            , const function_apply_type    arg_apply
-            , const unsigned               arg_sizeof_derived
-            , const unsigned               arg_dependence_capacity
-            );
-
-public:
-
-  template< typename ResultType >
-  KOKKOS_FUNCTION static
-  TaskMember * verify_type( TaskMember * t )
-    {
-      enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
-
-      if ( check_type && t != 0 ) {
-
-        // Verify that t->m_verify is this function
-        const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
-
-        if ( t->m_verify != self ) {
-          t = 0 ;
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-          throw_error_verify_type();
-#endif
-        }
-      }
-      return t ;
-    }
-
-  //----------------------------------------
-  /*  Inheritence Requirements on task types:
-   *    typedef  FunctorType::value_type  value_type ;
-   *    class DerivedTaskType
-   *      : public TaskMember< Serial , value_type , FunctorType >
-   *      { ... };
-   *    class TaskMember< Serial , value_type , FunctorType >
-   *      : public TaskMember< Serial , value_type , void >
-   *      , public Functor
-   *      { ... };
-   *  If value_type != void
-   *    class TaskMember< Serial , value_type , void >
-   *      : public TaskMember< Serial , void , void >
-   *
-   *  Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
-   *
-   */
-
-  /** \brief  Allocate and construct a single-thread task */
-  template< class DerivedTaskType >
-  static
-  TaskMember * create( const typename DerivedTaskType::functor_type &  arg_functor
-                     , const unsigned                                  arg_dependence_capacity
-                     )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-      typedef typename functor_type::value_type       value_type ;
-
-      DerivedTaskType * const task =
-        new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
-          DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
-                         , & TaskMember::template apply_single< functor_type , value_type >
-                         , sizeof(DerivedTaskType)
-                         , arg_dependence_capacity
-                         , arg_functor );
-
-      return static_cast< TaskMember * >( task );
-    }
-
-  /** \brief  Allocate and construct a data parallel task */
-  template< class DerivedTaskType >
-  static
-  TaskMember * create( const typename DerivedTaskType::policy_type &   arg_policy
-                     , const typename DerivedTaskType::functor_type &  arg_functor
-                     , const unsigned                                  arg_dependence_capacity
-                     )
-    {
-      DerivedTaskType * const task =
-        new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
-          DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
-                         , sizeof(DerivedTaskType)
-                         , arg_dependence_capacity
-                         , arg_policy
-                         , arg_functor
-                         );
-
-      return static_cast< TaskMember * >( task );
-    }
-
-  /** \brief  Allocate and construct a thread-team task */
-  template< class DerivedTaskType >
-  static
-  TaskMember * create_team( const typename DerivedTaskType::functor_type &  arg_functor
-                          , const unsigned                                  arg_dependence_capacity
-                          )
-    {
-      typedef typename DerivedTaskType::functor_type  functor_type ;
-      typedef typename functor_type::value_type       value_type ;
-
-      DerivedTaskType * const task =
-        new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
-          DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
-                         , & TaskMember::template apply_team< functor_type , value_type >
-                         , sizeof(DerivedTaskType)
-                         , arg_dependence_capacity
-                         , arg_functor );
-
-      return static_cast< TaskMember * >( task );
-    }
-
-  void schedule();
-  static void execute_ready_tasks();
-
-  //----------------------------------------
-
-  typedef FutureValueTypeIsVoidError get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const { return get_result_type() ; }
-
-  KOKKOS_INLINE_FUNCTION
-  Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
-
-  //----------------------------------------
-
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-  static
-  void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false );
-#else
-  KOKKOS_INLINE_FUNCTION static
-  void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {}
-#endif
-
-  KOKKOS_INLINE_FUNCTION
-  TaskMember * get_dependence( int i ) const
-    { return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  int get_dependence() const
-    { return m_dep_size ; }
-
-  KOKKOS_INLINE_FUNCTION
-  void clear_dependence()
-    {
-      for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 );
-      m_dep_size = 0 ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( TaskMember * before )
-    {
-      if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state ||
-             Kokkos::Experimental::TASK_STATE_EXECUTING    == m_state ) &&
-           m_dep_size < m_dep_capacity ) {
-        assign( m_dep + m_dep_size , before );
-        ++m_dep_size ;
-      }
-      else {
-        throw_error_add_dependence();
-      }
-    }
-
-  //----------------------------------------
-
-  template< class FunctorType , class ResultType >
-  KOKKOS_INLINE_FUNCTION static
-  void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
-    {
-      typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
-
-      // TaskMember< Kokkos::Serial , ResultType , FunctorType >
-      //   : public TaskMember< Kokkos::Serial , ResultType , void >
-      //   , public FunctorType
-      //   { ... };
-
-      derived_type & m = * static_cast< derived_type * >( t );
-
-      Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result );
-    }
-
-  template< class FunctorType , class ResultType >
-  KOKKOS_INLINE_FUNCTION static
-  void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
-    {
-      typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
-
-      // TaskMember< Kokkos::Serial , ResultType , FunctorType >
-      //   : public TaskMember< Kokkos::Serial , ResultType , void >
-      //   , public FunctorType
-      //   { ... };
-
-      derived_type & m = * static_cast< derived_type * >( t );
-
-      Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m );
-    }
-
-  //----------------------------------------
-
-  template< class FunctorType , class ResultType >
-  static
-  void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
-    {
-      typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
-      typedef Kokkos::Impl::SerialTeamMember                          member_type ;
-
-      // TaskMember< Kokkos::Serial , ResultType , FunctorType >
-      //   : public TaskMember< Kokkos::Serial , ResultType , void >
-      //   , public FunctorType
-      //   { ... };
-
-      derived_type & m = * static_cast< derived_type * >( t );
-
-      m.FunctorType::apply( member_type(0,1,0) , m.m_result );
-    }
-
-  template< class FunctorType , class ResultType >
-  static
-  void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
-    {
-      typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
-      typedef Kokkos::Impl::SerialTeamMember                          member_type ;
-
-      // TaskMember< Kokkos::Serial , ResultType , FunctorType >
-      //   : public TaskMember< Kokkos::Serial , ResultType , void >
-      //   , public FunctorType
-      //   { ... };
-
-      derived_type & m = * static_cast< derived_type * >( t );
-
-      m.FunctorType::apply( member_type(0,1,0) );
-    }
-};
-
-//----------------------------------------------------------------------------
-/** \brief  Base class for tasks with a result value in the Serial execution space.
- *
- *  The FunctorType must be void because this class is accessed by the
- *  Future class for the task and result value.
- *
- *  Must be derived from TaskMember<S,void,void> 'root class' so the Future class
- *  can correctly static_cast from the 'root class' to this class.
- */
-template < class ResultType >
-class TaskMember< Kokkos::Serial , ResultType , void >
-  : public TaskMember< Kokkos::Serial , void , void >
-{
-public:
-
-  ResultType  m_result ;
-
-  typedef const ResultType & get_result_type ;
-
-  KOKKOS_INLINE_FUNCTION
-  get_result_type get() const { return m_result ; }
-
-protected:
-
-  typedef TaskMember< Kokkos::Serial , void , void >  task_root_type ;
-  typedef task_root_type::function_dealloc_type       function_dealloc_type ;
-  typedef task_root_type::function_apply_type         function_apply_type ;
-
-  inline
-  TaskMember( const function_dealloc_type  arg_dealloc
-            , const function_apply_type    arg_apply
-            , const unsigned               arg_sizeof_derived
-            , const unsigned               arg_dependence_capacity
-            )
-    : task_root_type( & task_root_type::template verify_type< ResultType >
-                    , arg_dealloc
-                    , arg_apply
-                    , arg_sizeof_derived
-                    , arg_dependence_capacity )
-    , m_result()
-    {}
-};
-
-template< class ResultType , class FunctorType >
-class TaskMember< Kokkos::Serial , ResultType , FunctorType >
-  : public TaskMember< Kokkos::Serial , ResultType , void >
-  , public FunctorType
-{
-public:
-
-  typedef FunctorType  functor_type ;
-
-  typedef TaskMember< Kokkos::Serial , void , void >        task_root_type ;
-  typedef TaskMember< Kokkos::Serial , ResultType , void >  task_base_type ;
-  typedef task_root_type::function_dealloc_type             function_dealloc_type ;
-  typedef task_root_type::function_apply_type               function_apply_type ;
-
-  inline
-  TaskMember( const function_dealloc_type  arg_dealloc
-            , const function_apply_type    arg_apply
-            , const unsigned               arg_sizeof_derived
-            , const unsigned               arg_dependence_capacity
-            , const functor_type &         arg_functor
-            )
-    : task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity )
-    , functor_type( arg_functor )
-    {}
-};
-
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-
-template<>
-class TaskPolicy< Kokkos::Serial >
-{
-public:
-
-  typedef Kokkos::Serial                  execution_space ;
-  typedef Kokkos::Impl::SerialTeamMember  member_type ;
-
-private:
-
-  typedef Impl::TaskMember< execution_space , void , void > task_root_type ;
-
-  template< class FunctorType >
-  static inline
-  const task_root_type * get_task_root( const FunctorType * f )
-    {
-      typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
-      return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
-    }
-
-  template< class FunctorType >
-  static inline
-  task_root_type * get_task_root( FunctorType * f )
-    {
-      typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
-      return static_cast< task_root_type * >( static_cast< task_type * >(f) );
-    }
-
-  unsigned m_default_dependence_capacity ;
-
-public:
-
-  // Stubbed out for now.
-  KOKKOS_INLINE_FUNCTION
-  int allocated_task_count() const { return 0 ; }
-
-  TaskPolicy
-    ( const unsigned /* arg_task_max_count */
-    , const unsigned /* arg_task_max_size */
-    , const unsigned arg_task_default_dependence_capacity = 4
-    , const unsigned /* arg_task_team_size */ = 0
-    )
-    : m_default_dependence_capacity( arg_task_default_dependence_capacity )
-    {}
-
-  KOKKOS_FUNCTION TaskPolicy() = default ;
-  KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
-  KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
-
-  //----------------------------------------
-
-  template< class ValueType >
-  KOKKOS_INLINE_FUNCTION
-  const Future< ValueType , execution_space > &
-    spawn( const Future< ValueType , execution_space > & f 
-         , const bool priority = false ) const
-      {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        f.m_task->schedule();
-#endif
-        return f ;
-      }
-
-  //----------------------------------------
-  // Create single-thread task
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  task_create( const FunctorType & functor
-             , const unsigned dependence_capacity = ~0u ) const
-    {
-      typedef typename FunctorType::value_type value_type ;
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >  task_type ;
-      return Future< value_type , execution_space >(
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        task_root_type::create< task_type >(
-          functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
-#endif
-        );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  proc_create( const FunctorType & functor
-             , const unsigned dependence_capacity = ~0u ) const
-    { return task_create( functor , dependence_capacity ); }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  task_create_team( const FunctorType & functor
-                  , const unsigned dependence_capacity = ~0u ) const
-    {
-      typedef typename FunctorType::value_type value_type ;
-      typedef Impl::TaskMember< execution_space , value_type , FunctorType >  task_type ;
-      return Future< value_type , execution_space >(
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        task_root_type::create_team< task_type >(
-          functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
-#endif
-        );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< typename FunctorType::value_type , execution_space >
-  proc_create_team( const FunctorType & functor
-                  , const unsigned dependence_capacity = ~0u ) const
-    { return task_create_team( functor , dependence_capacity ); }
-
-  //----------------------------------------
-  // Add dependence
-  template< class A1 , class A2 , class A3 , class A4 >
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( const Future<A1,A2> & after
-                     , const Future<A3,A4> & before
-                     , typename Kokkos::Impl::enable_if
-                        < Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
-                          &&
-                          Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
-                        >::type * = 0
-                      ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      after.m_task->add_dependence( before.m_task );
-#endif
-    }
-
-  //----------------------------------------
-  // Functions for an executing task functor to query dependences,
-  // set new dependences, and respawn itself.
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  Future< void , execution_space >
-  get_dependence( const FunctorType * task_functor , int i ) const
-    {
-      return Future<void,execution_space>(
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-        get_task_root(task_functor)->get_dependence(i)
-#endif
-        );
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  int get_dependence( const FunctorType * task_functor ) const
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-    { return get_task_root(task_functor)->get_dependence(); }
-#else
-    { return 0 ; }
-#endif
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void clear_dependence( FunctorType * task_functor ) const
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-    { get_task_root(task_functor)->clear_dependence(); }
-#else
-    {}
-#endif
-
-  template< class FunctorType , class A3 , class A4 >
-  KOKKOS_INLINE_FUNCTION
-  void add_dependence( FunctorType * task_functor
-                     , const Future<A3,A4> & before
-                     , typename Kokkos::Impl::enable_if
-                        < Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
-                        >::type * = 0
-                      ) const
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-    { get_task_root(task_functor)->add_dependence( before.m_task ); }
-#else
-    {}
-#endif
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void respawn( FunctorType * task_functor 
-              , const bool priority = false ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      get_task_root(task_functor)->schedule();
-#endif
-    }
-
-  template< class FunctorType >
-  KOKKOS_INLINE_FUNCTION
-  void respawn_needing_memory( FunctorType * task_functor ) const
-    {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-      get_task_root(task_functor)->schedule();
-#endif
-    }
-
-  //----------------------------------------
-
-  static member_type & member_single();
-};
-
-inline
-void wait( TaskPolicy< Kokkos::Serial > & )
-{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); }
-
-} /* namespace Experimental */
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-#endif /* defined( KOKKOS_HAVE_SERIAL ) */
-#endif /* #define KOKKOS_EXPERIMENTAL_SERIAL_TASK_HPP */
-
diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.cpp b/lib/kokkos/core/src/impl/Kokkos_Shape.cpp
deleted file mode 100644
index da12db1f381e790e46604f8a15280d2a07f5152a..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_Shape.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-
-#include <sstream>
-#include <impl/Kokkos_Error.hpp>
-#include <impl/Kokkos_Shape.hpp>
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-void assert_counts_are_equal_throw(
-  const size_t x_count ,
-  const size_t y_count )
-{
-  std::ostringstream msg ;
-
-  msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
-      << x_count << " != " << y_count << " )" ;
-
-  throw_runtime_exception( msg.str() );
-}
-
-void assert_shapes_are_equal_throw(
-  const unsigned x_scalar_size ,
-  const unsigned x_rank ,
-  const size_t   x_N0 , const unsigned x_N1 ,
-  const unsigned x_N2 , const unsigned x_N3 ,
-  const unsigned x_N4 , const unsigned x_N5 ,
-  const unsigned x_N6 , const unsigned x_N7 ,
-
-  const unsigned y_scalar_size ,
-  const unsigned y_rank ,
-  const size_t   y_N0 , const unsigned y_N1 ,
-  const unsigned y_N2 , const unsigned y_N3 ,
-  const unsigned y_N4 , const unsigned y_N5 ,
-  const unsigned y_N6 , const unsigned y_N7 )
-{
-  std::ostringstream msg ;
-
-  msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
-      << " scalar_size(" << x_scalar_size
-      << ") rank(" << x_rank
-      << ") dimension(" ;
-  if ( 0 < x_rank ) { msg << " " << x_N0 ; }
-  if ( 1 < x_rank ) { msg << " " << x_N1 ; }
-  if ( 2 < x_rank ) { msg << " " << x_N2 ; }
-  if ( 3 < x_rank ) { msg << " " << x_N3 ; }
-  if ( 4 < x_rank ) { msg << " " << x_N4 ; }
-  if ( 5 < x_rank ) { msg << " " << x_N5 ; }
-  if ( 6 < x_rank ) { msg << " " << x_N6 ; }
-  if ( 7 < x_rank ) { msg << " " << x_N7 ; }
-  msg << " ) } != { "
-      << " scalar_size(" << y_scalar_size
-      << ") rank(" << y_rank
-      << ") dimension(" ;
-  if ( 0 < y_rank ) { msg << " " << y_N0 ; }
-  if ( 1 < y_rank ) { msg << " " << y_N1 ; }
-  if ( 2 < y_rank ) { msg << " " << y_N2 ; }
-  if ( 3 < y_rank ) { msg << " " << y_N3 ; }
-  if ( 4 < y_rank ) { msg << " " << y_N4 ; }
-  if ( 5 < y_rank ) { msg << " " << y_N5 ; }
-  if ( 6 < y_rank ) { msg << " " << y_N6 ; }
-  if ( 7 < y_rank ) { msg << " " << y_N7 ; }
-  msg << " ) } )" ;
-
-  throw_runtime_exception( msg.str() );
-}
-
-void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
-  const size_t rank ,
-  const size_t n0 , const size_t n1 , 
-  const size_t n2 , const size_t n3 ,
-  const size_t n4 , const size_t n5 ,
-  const size_t n6 , const size_t n7 ,
-
-  const size_t arg_rank ,
-  const size_t i0 , const size_t i1 ,
-  const size_t i2 , const size_t i3 ,
-  const size_t i4 , const size_t i5 ,
-  const size_t i6 , const size_t i7 )
-{
-  std::ostringstream msg ;
-  msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
-  if ( 0 < rank ) { msg << " " << n0 ; }
-  if ( 1 < rank ) { msg << " " << n1 ; }
-  if ( 2 < rank ) { msg << " " << n2 ; }
-  if ( 3 < rank ) { msg << " " << n3 ; }
-  if ( 4 < rank ) { msg << " " << n4 ; }
-  if ( 5 < rank ) { msg << " " << n5 ; }
-  if ( 6 < rank ) { msg << " " << n6 ; }
-  if ( 7 < rank ) { msg << " " << n7 ; }
-  msg << " } index = {" ;
-  if ( 0 < arg_rank ) { msg << " " << i0 ; }
-  if ( 1 < arg_rank ) { msg << " " << i1 ; }
-  if ( 2 < arg_rank ) { msg << " " << i2 ; }
-  if ( 3 < arg_rank ) { msg << " " << i3 ; }
-  if ( 4 < arg_rank ) { msg << " " << i4 ; }
-  if ( 5 < arg_rank ) { msg << " " << i5 ; }
-  if ( 6 < arg_rank ) { msg << " " << i6 ; }
-  if ( 7 < arg_rank ) { msg << " " << i7 ; }
-  msg << " } )" ;
-
-  throw_runtime_exception( msg.str() );
-}
-
-void assert_shape_effective_rank1_at_leastN_throw(
-  const size_t x_rank , const size_t x_N0 ,
-  const size_t x_N1 ,   const size_t x_N2 ,
-  const size_t x_N3 ,   const size_t x_N4 ,
-  const size_t x_N5 ,   const size_t x_N6 ,
-  const size_t x_N7 ,
-  const size_t N0 )
-{
-  std::ostringstream msg ;
-
-  msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
-  if ( 0 < x_rank ) { msg << " " << x_N0 ; }
-  if ( 1 < x_rank ) { msg << " " << x_N1 ; }
-  if ( 2 < x_rank ) { msg << " " << x_N2 ; }
-  if ( 3 < x_rank ) { msg << " " << x_N3 ; }
-  if ( 4 < x_rank ) { msg << " " << x_N4 ; }
-  if ( 5 < x_rank ) { msg << " " << x_N5 ; }
-  if ( 6 < x_rank ) { msg << " " << x_N6 ; }
-  if ( 7 < x_rank ) { msg << " " << x_N7 ; }
-  msg << " } N = " << N0 << " )" ;
-
-  throw_runtime_exception( msg.str() );
-}
-
-
-
-}
-}
-
diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp b/lib/kokkos/core/src/impl/Kokkos_Shape.hpp
deleted file mode 100644
index 9749e0a1ff73107b97435862f737d96439fcb9d3..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp
+++ /dev/null
@@ -1,917 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_SHAPE_HPP
-#define KOKKOS_SHAPE_HPP
-
-#include <typeinfo>
-#include <utility>
-#include <Kokkos_Core_fwd.hpp>
-#include <impl/Kokkos_Traits.hpp>
-#include <impl/Kokkos_StaticAssert.hpp>
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-//----------------------------------------------------------------------------
-/** \brief  The shape of a Kokkos with dynamic and static dimensions.
- *          Dynamic dimensions are member values and static dimensions are
- *          'static const' values.
- *
- *  The upper bound on the array rank is eight.
- */
-template< unsigned ScalarSize ,
-          unsigned Rank ,
-          unsigned s0  = 1 ,
-          unsigned s1  = 1 ,
-          unsigned s2  = 1 ,
-          unsigned s3  = 1 ,
-          unsigned s4  = 1 ,
-          unsigned s5  = 1 ,
-          unsigned s6  = 1 ,
-          unsigned s7  = 1 >
-struct Shape ;
-
-//----------------------------------------------------------------------------
-/** \brief  Shape equality if the value type, layout, and dimensions
- *          are equal.
- */
-template< unsigned xSize , unsigned xRank ,
-          unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
-          unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
-
-          unsigned ySize , unsigned yRank ,
-          unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
-          unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
-KOKKOS_INLINE_FUNCTION
-bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
-                   const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
-{
-  enum { same_size = xSize == ySize };
-  enum { same_rank = xRank == yRank };
-
-  return same_size && same_rank &&
-         size_t( x.N0 )   == size_t( y.N0 ) &&
-         unsigned( x.N1 ) == unsigned( y.N1 ) &&
-         unsigned( x.N2 ) == unsigned( y.N2 ) &&
-         unsigned( x.N3 ) == unsigned( y.N3 ) &&
-         unsigned( x.N4 ) == unsigned( y.N4 ) &&
-         unsigned( x.N5 ) == unsigned( y.N5 ) &&
-         unsigned( x.N6 ) == unsigned( y.N6 ) &&
-         unsigned( x.N7 ) == unsigned( y.N7 ) ;
-}
-
-template< unsigned xSize , unsigned xRank ,
-          unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
-          unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
-
-          unsigned ySize ,unsigned yRank ,
-          unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
-          unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
-KOKKOS_INLINE_FUNCTION
-bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
-                   const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
-{ return ! operator == ( x , y ); }
-
-//----------------------------------------------------------------------------
-
-void assert_counts_are_equal_throw(
-  const size_t x_count ,
-  const size_t y_count );
-
-inline
-void assert_counts_are_equal(
-  const size_t x_count ,
-  const size_t y_count )
-{
-  if ( x_count != y_count ) {
-    assert_counts_are_equal_throw( x_count , y_count );
-  }
-}
-
-void assert_shapes_are_equal_throw(
-  const unsigned x_scalar_size ,
-  const unsigned x_rank ,
-  const size_t   x_N0 , const unsigned x_N1 ,
-  const unsigned x_N2 , const unsigned x_N3 ,
-  const unsigned x_N4 , const unsigned x_N5 ,
-  const unsigned x_N6 , const unsigned x_N7 ,
-
-  const unsigned y_scalar_size ,
-  const unsigned y_rank ,
-  const size_t   y_N0 , const unsigned y_N1 ,
-  const unsigned y_N2 , const unsigned y_N3 ,
-  const unsigned y_N4 , const unsigned y_N5 ,
-  const unsigned y_N6 , const unsigned y_N7 );
-
-template< unsigned xSize , unsigned xRank ,
-          unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
-          unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
-
-          unsigned ySize , unsigned yRank ,
-          unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
-          unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
-inline
-void assert_shapes_are_equal(
-  const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
-  const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
-{
-  typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
-  typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
-
-  if ( x != y ) {
-    assert_shapes_are_equal_throw(
-      x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
-      y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
-  }
-}
-
-template< unsigned xSize , unsigned xRank ,
-          unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
-          unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
-
-          unsigned ySize , unsigned yRank ,
-          unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
-          unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
-void assert_shapes_equal_dimension(
-  const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
-  const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
-{
-  typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
-  typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
-
-  // Omit comparison of scalar_size.
-  if ( unsigned( x.rank ) != unsigned( y.rank ) ||
-       size_t( x.N0 )   != size_t( y.N0 ) || 
-       unsigned( x.N1 ) != unsigned( y.N1 ) || 
-       unsigned( x.N2 ) != unsigned( y.N2 ) || 
-       unsigned( x.N3 ) != unsigned( y.N3 ) ||
-       unsigned( x.N4 ) != unsigned( y.N4 ) || 
-       unsigned( x.N5 ) != unsigned( y.N5 ) || 
-       unsigned( x.N6 ) != unsigned( y.N6 ) || 
-       unsigned( x.N7 ) != unsigned( y.N7 ) ) {
-    assert_shapes_are_equal_throw(
-      x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
-      y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
-  }
-}
-
-//----------------------------------------------------------------------------
-
-template< class ShapeType > struct assert_shape_is_rank_zero ;
-template< class ShapeType > struct assert_shape_is_rank_one ;
-
-template< unsigned Size >
-struct assert_shape_is_rank_zero< Shape<Size,0> >
-  : public true_type {};
-
-template< unsigned Size , unsigned s0 >
-struct assert_shape_is_rank_one< Shape<Size,1,s0> >
-  : public true_type {};
-
-//----------------------------------------------------------------------------
-
-/** \brief  Array bounds assertion templated on the execution space
- *          to allow device-specific abort code.
- */
-template< class Space >
-struct AssertShapeBoundsAbort ;
-
-template<>
-struct AssertShapeBoundsAbort< Kokkos::HostSpace >
-{
-  static void apply( const size_t rank ,
-                     const size_t n0 , const size_t n1 ,
-                     const size_t n2 , const size_t n3 ,
-                     const size_t n4 , const size_t n5 ,
-                     const size_t n6 , const size_t n7 ,
-                     const size_t arg_rank ,
-                     const size_t i0 , const size_t i1 ,
-                     const size_t i2 , const size_t i3 ,
-                     const size_t i4 , const size_t i5 ,
-                     const size_t i6 , const size_t i7 );
-};
-
-template< class ExecutionSpace >
-struct AssertShapeBoundsAbort
-{
-  KOKKOS_INLINE_FUNCTION
-  static void apply( const size_t rank ,
-                     const size_t n0 , const size_t n1 ,
-                     const size_t n2 , const size_t n3 ,
-                     const size_t n4 , const size_t n5 ,
-                     const size_t n6 , const size_t n7 ,
-                     const size_t arg_rank ,
-                     const size_t i0 , const size_t i1 ,
-                     const size_t i2 , const size_t i3 ,
-                     const size_t i4 , const size_t i5 ,
-                     const size_t i6 , const size_t i7 )
-    {
-      AssertShapeBoundsAbort< Kokkos::HostSpace >
-        ::apply( rank ,    n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
-                 arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
-    }
-};
-
-template< class ShapeType >
-KOKKOS_INLINE_FUNCTION
-void assert_shape_bounds( const ShapeType & shape ,
-                          const size_t arg_rank ,
-                          const size_t i0 ,
-                          const size_t i1 = 0 ,
-                          const size_t i2 = 0 ,
-                          const size_t i3 = 0 ,
-                          const size_t i4 = 0 ,
-                          const size_t i5 = 0 ,
-                          const size_t i6 = 0 ,
-                          const size_t i7 = 0 )
-{
-  // Must supply at least as many indices as ranks.
-  // Every index must be within bounds.
-  const bool ok = ShapeType::rank <= arg_rank &&
-                  i0 < size_t(shape.N0) && 
-                  i1 < size_t(shape.N1) &&
-                  i2 < size_t(shape.N2) &&
-                  i3 < size_t(shape.N3) &&
-                  i4 < size_t(shape.N4) &&
-                  i5 < size_t(shape.N5) &&
-                  i6 < size_t(shape.N6) &&
-                  i7 < size_t(shape.N7) ;
-
-  if ( ! ok ) {
-    AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
-      ::apply( ShapeType::rank ,
-               shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
-               shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
-               arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
-  }
-}
-
-#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
-#else
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
-#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
-#endif
-
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-// Specialization and optimization for the Rank 0 shape.
-
-template < unsigned ScalarSize >
-struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 0 };
-  enum { rank         = 0 };
-
-  enum { N0 = 1 };
-  enum { N1 = 1 };
-  enum { N2 = 1 };
-  enum { N3 = 1 };
-  enum { N4 = 1 };
-  enum { N5 = 1 };
-  enum { N6 = 1 };
-  enum { N7 = 1 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  {}
-};
-
-//----------------------------------------------------------------------------
-
-template< unsigned R > struct assign_shape_dimension ;
-
-#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \
-template<> \
-struct assign_shape_dimension< R > \
-{ \
-  template< class ShapeType > \
-  KOKKOS_INLINE_FUNCTION \
-  assign_shape_dimension( ShapeType & shape \
-                        , typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \
-                        ) { shape.N ## R = n ; } \
-};
-
-KOKKOS_ASSIGN_SHAPE_DIMENSION(0)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(1)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(2)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(3)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(4)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(5)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(6)
-KOKKOS_ASSIGN_SHAPE_DIMENSION(7)
-
-#undef KOKKOS_ASSIGN_SHAPE_DIMENSION
-
-//----------------------------------------------------------------------------
-// All-static dimension array
-
-template < unsigned ScalarSize ,
-           unsigned Rank ,
-           unsigned s0 ,
-           unsigned s1 ,
-           unsigned s2 ,
-           unsigned s3 ,
-           unsigned s4 ,
-           unsigned s5 ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape {
-
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 0 };
-  enum { rank         = Rank };
-
-  enum { N0 = s0 };
-  enum { N1 = s1 };
-  enum { N2 = s2 };
-  enum { N3 = s3 };
-  enum { N4 = s4 };
-  enum { N5 = s5 };
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  {}
-};
-
-// 1 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize ,
-           unsigned Rank ,
-           unsigned s1 ,
-           unsigned s2 ,
-           unsigned s3 ,
-           unsigned s4 ,
-           unsigned s5 ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 1 };
-  enum { rank         = Rank };
-
-  size_t N0 ; // For 1 == dynamic_rank allow  N0 > 2^32
-
-  enum { N1 = s1 };
-  enum { N2 = s2 };
-  enum { N3 = s3 };
-  enum { N4 = s4 };
-  enum { N5 = s5 };
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  { s.N0 = n0 ; }
-};
-
-// 2 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize , unsigned Rank ,
-           unsigned s2 ,
-           unsigned s3 ,
-           unsigned s4 ,
-           unsigned s5 ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 2 };
-  enum { rank         = Rank };
-
-  unsigned N0 ;
-  unsigned N1 ;
-
-  enum { N2 = s2 };
-  enum { N3 = s3 };
-  enum { N4 = s4 };
-  enum { N5 = s5 };
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  { s.N0 = n0 ; s.N1 = n1 ; }
-};
-
-// 3 == dynamic_rank <= rank <= 8
-template < unsigned Rank , unsigned ScalarSize ,
-           unsigned s3 ,
-           unsigned s4 ,
-           unsigned s5 ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 3 };
-  enum { rank         = Rank };
-
-  unsigned N0 ;
-  unsigned N1 ;
-  unsigned N2 ;
-
-  enum { N3 = s3 };
-  enum { N4 = s4 };
-  enum { N5 = s5 };
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
-};
-
-// 4 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize , unsigned Rank ,
-           unsigned s4 ,
-           unsigned s5 ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 4 };
-  enum { rank         = Rank };
-
-  unsigned N0 ;
-  unsigned N1 ;
-  unsigned N2 ;
-  unsigned N3 ;
-
-  enum { N4 = s4 };
-  enum { N5 = s5 };
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
-               unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
-};
-
-// 5 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize , unsigned Rank ,
-           unsigned s5 ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 5 };
-  enum { rank         = Rank };
-
-  unsigned N0 ;
-  unsigned N1 ;
-  unsigned N2 ;
-  unsigned N3 ;
-  unsigned N4 ;
-
-  enum { N5 = s5 };
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
-               unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
-  { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
-};
-
-// 6 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize , unsigned Rank ,
-           unsigned s6 ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 6 };
-  enum { rank         = Rank };
-
-  unsigned N0 ;
-  unsigned N1 ;
-  unsigned N2 ;
-  unsigned N3 ;
-  unsigned N4 ;
-  unsigned N5 ;
-
-  enum { N6 = s6 };
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
-               unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
-  {
-    s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
-    s.N4 = n4 ; s.N5 = n5 ;
-  }
-};
-
-// 7 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize , unsigned Rank ,
-           unsigned s7 >
-struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 7 };
-  enum { rank         = Rank };
-
-  unsigned N0 ;
-  unsigned N1 ;
-  unsigned N2 ;
-  unsigned N3 ;
-  unsigned N4 ;
-  unsigned N5 ;
-  unsigned N6 ;
-
-  enum { N7 = s7 };
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
-               unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
-  {
-    s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
-    s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
-  }
-};
-
-// 8 == dynamic_rank <= rank <= 8
-template < unsigned ScalarSize >
-struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
-{
-  enum { scalar_size   = ScalarSize };
-  enum { rank_dynamic = 8 };
-  enum { rank         = 8 };
-
-  unsigned N0 ;
-  unsigned N1 ;
-  unsigned N2 ;
-  unsigned N3 ;
-  unsigned N4 ;
-  unsigned N5 ;
-  unsigned N6 ;
-  unsigned N7 ;
-
-  KOKKOS_INLINE_FUNCTION
-  static
-  void assign( Shape & s ,
-               unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
-               unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
-  {
-    s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
-    s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
-  }
-};
-
-//----------------------------------------------------------------------------
-
-template< class ShapeType , unsigned N ,
-          unsigned R = ShapeType::rank_dynamic >
-struct ShapeInsert ;
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 0 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 N ,
-                 ShapeType::N0 ,
-                 ShapeType::N1 ,
-                 ShapeType::N2 ,
-                 ShapeType::N3 ,
-                 ShapeType::N4 ,
-                 ShapeType::N5 ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 1 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 N ,
-                 ShapeType::N1 ,
-                 ShapeType::N2 ,
-                 ShapeType::N3 ,
-                 ShapeType::N4 ,
-                 ShapeType::N5 ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 2 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 0 ,
-                 N ,
-                 ShapeType::N2 ,
-                 ShapeType::N3 ,
-                 ShapeType::N4 ,
-                 ShapeType::N5 ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 3 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 N ,
-                 ShapeType::N3 ,
-                 ShapeType::N4 ,
-                 ShapeType::N5 ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 4 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 N ,
-                 ShapeType::N4 ,
-                 ShapeType::N5 ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 5 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 N ,
-                 ShapeType::N5 ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 6 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 N ,
-                 ShapeType::N6 > type ;
-};
-
-template< class ShapeType , unsigned N >
-struct ShapeInsert< ShapeType , N , 7 >
-{
-  typedef Shape< ShapeType::scalar_size ,
-                 ShapeType::rank + 1 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 0 ,
-                 N > type ;
-};
-
-//----------------------------------------------------------------------------
-
-template< class DstShape , class SrcShape ,
-          unsigned DstRankDynamic   = DstShape::rank_dynamic ,
-          bool     DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
-struct ShapeCompatible { enum { value = false }; };
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 8 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 7 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 6 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 5 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 4 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
-                 unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 3 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
-                 unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
-                 unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 2 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
-                 unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
-                 unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
-                 unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 1 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
-                 unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
-                 unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
-                 unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
-                 unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-template< class DstShape , class SrcShape >
-struct ShapeCompatible< DstShape , SrcShape , 0 , true >
-{
-  enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
-                 unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
-                 unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
-                 unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
-                 unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
-                 unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
-                 unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
-                 unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
-                 unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
-};
-
-} /* namespace Impl */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< unsigned ScalarSize , unsigned Rank ,
-          unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
-          unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
-          typename iType >
-KOKKOS_INLINE_FUNCTION
-size_t dimension( 
-  const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
-  const iType & r )
-{
-  return 0 == r ? shape.N0 : (
-         1 == r ? shape.N1 : (
-         2 == r ? shape.N2 : (
-         3 == r ? shape.N3 : (
-         4 == r ? shape.N4 : (
-         5 == r ? shape.N5 : (
-         6 == r ? shape.N6 : (
-         7 == r ? shape.N7 : 1 )))))));
-}
-
-template< unsigned ScalarSize , unsigned Rank ,
-          unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
-          unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
-KOKKOS_INLINE_FUNCTION
-size_t cardinality_count(
-  const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
-{
-  return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
-         shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
-}
-
-//----------------------------------------------------------------------------
-
-} /* namespace Impl */
-} /* namespace Kokkos */
-
-#endif /* #ifndef KOKKOS_CORESHAPE_HPP */
-
diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp
similarity index 85%
rename from lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
rename to lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp
index 96b37043455e51d726e1d22e4f3e450986acae01..1ae51742e0a2cbe437abf17c7dedb8658c3e3e94 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp
@@ -44,7 +44,6 @@
 #include <Kokkos_Core.hpp>
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ;
@@ -62,7 +61,7 @@ void SharedAllocationRecord< void , void >::tracking_release_and_enable()
   // now release and enable tracking.
 
   if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){
-    Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
+    Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
   }
 }
 
@@ -98,10 +97,10 @@ if ( ! ok ) {
   const char * format_string;
   
   if (sizeof(uintptr_t) == sizeof(unsigned long)) {
-     format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
+     format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
   }
   else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
-     format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
+     format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
   }
 
   fprintf(stderr
@@ -119,7 +118,7 @@ if ( ! ok ) {
     }
 
     if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
-      Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking");
+      Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed is_sane unlocking");
     }
   }
 
@@ -145,7 +144,7 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con
   if ( r == arg_root ) { r = 0 ; }
 
   if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
-    Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
+    Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
   }
 
   return r ;
@@ -190,11 +189,11 @@ SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
     Kokkos::memory_fence();
 
     if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
-      Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
+      Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
     }
   }
   else {
-    Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord given NULL allocation");
+    Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord given NULL allocation");
   }
 }
 
@@ -205,7 +204,7 @@ increment( SharedAllocationRecord< void , void > * arg_record )
   const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 );
 
   if ( old_count < 0 ) { // Error
-    Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment");
+    Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed increment");
   }
 }
 
@@ -219,7 +218,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
 
 #if 0
   if ( old_count <= 1 ) {
-    fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
+    fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
     fflush(stderr);
   }
 #endif
@@ -251,7 +250,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
 
     // Unlock the list:
     if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
-      Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking");
+      Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement unlocking");
     }
 
     arg_record->m_next = 0 ;
@@ -262,9 +261,9 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
     arg_record = 0 ;
   }
   else if ( old_count < 1 ) { // Error
-    fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count );
+    fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count );
     fflush(stderr);
-    Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count");
+    Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement count");
   }
 
   return arg_record ;
@@ -340,7 +339,6 @@ print_host_accessible_records( std::ostream & s
 }
 
 } /* namespace Impl */
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 
diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp
similarity index 96%
rename from lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
rename to lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp
index 1498eafb008ffa5d26a84094df9ba3f48126551e..a9c2d6f22a7638c33431575723d549194eec79af 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp
@@ -48,7 +48,6 @@
 #include <string>
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 template< class MemorySpace = void , class DestroyFunctor = void >
@@ -109,6 +108,7 @@ protected:
                         );
 
 public:
+  inline std::string get_label() const { return std::string("Unmanaged"); }
 
   static int tracking_enabled() { return s_tracking_enabled ; }
 
@@ -209,7 +209,7 @@ private:
                         , const size_t        arg_alloc
                         )
     /*  Allocate user memory as [ SharedAllocationHeader , user_memory ] */
-    : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > )
+    : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Impl::deallocate< MemorySpace , DestroyFunctor > )
     , m_destroy()
     {}
 
@@ -238,6 +238,9 @@ public:
     }
 };
 
+template< class MemorySpace >
+class SharedAllocationRecord<MemorySpace,void> : public SharedAllocationRecord< void , void > {};
+
 union SharedAllocationTracker {
 private:
 
@@ -297,9 +300,9 @@ public:
   template< class MemorySpace >
   std::string get_label() const
     {
-      return ( m_record_bits & DO_NOT_DEREF_FLAG )
+      return ( m_record_bits == DO_NOT_DEREF_FLAG )
              ? std::string()
-             : static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label()
+             : reinterpret_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record_bits & ~DO_NOT_DEREF_FLAG )->get_label()
              ;
     }
 
@@ -394,7 +397,6 @@ public:
 
 
 } /* namespace Impl */
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 #endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp
index 0bc2864ff1d9079f47ec4369f25388794aa52f71..9545e7e6b33a919ca925d00160e9c0fbed433b9c 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp
@@ -61,34 +61,15 @@
  *   struct Foo { using array_layout = void; };
  *   have_array_layout<Foo>::value == 1;
  */
-#define KOKKOS_HAVE_TYPE( Type )                                                \
-template <typename T>                                                           \
-struct have_##Type {                                                            \
-  template <typename U> static std::false_type have_type(...);                  \
-  template <typename U> static std::true_type  have_type( typename U::Type* );  \
-  using type = decltype(have_type<T>(nullptr));                                 \
-  static constexpr bool value = type::value;                                    \
-}
-
-/** KOKKOS_IS_CONCEPT( Concept )
- *
- * defines a meta-function that check if a type match the given Kokkos concept
- * type alias which matches Type
- *
- * e.g.
- *   KOKKOS_IS_CONCEPT( array_layout );
- *   struct Foo { using array_layout = Foo; };
- *   is_array_layout<Foo>::value == 1;
- */
-#define KOKKOS_IS_CONCEPT( Concept )                                            \
-template <typename T>                                                           \
-struct is_##Concept {                                                           \
-  template <typename U> static std::false_type have_concept(...);               \
-  template <typename U> static auto have_concept( typename U::Concept* )        \
-                          ->typename std::is_same<T, typename U::Concept>::type;\
-  using type = decltype(have_concept<T>(nullptr));                              \
-  static constexpr bool value = type::value;                                    \
-}
+#define KOKKOS_HAVE_TYPE( TYPE ) \
+template <typename T> struct have_ ## TYPE { \
+private: \
+  template <typename U, typename = void > struct X : std::false_type {}; \
+  template <typename U> struct X<U,typename std::conditional<true,void,typename X:: TYPE >::type > : std::true_type {}; \
+public: \
+  typedef typename X<T>::type type ; \
+  enum : bool { value = type::value }; \
+};
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
@@ -98,101 +79,11 @@ namespace Kokkos { namespace Impl {
 template <typename T>
 using is_void = std::is_same<void,T>;
 
-// is_memory_space<T>::value
-KOKKOS_IS_CONCEPT( memory_space );
-
-// is_memory_traits<T>::value
-KOKKOS_IS_CONCEPT( memory_traits );
-
-// is_execution_space<T>::value
-KOKKOS_IS_CONCEPT( execution_space );
-
-// is_execution_policy<T>::value
-KOKKOS_IS_CONCEPT( execution_policy );
-
-// is_array_layout<T>::value
-KOKKOS_IS_CONCEPT( array_layout );
-
-// is_iteration_pattern<T>::value
-KOKKOS_IS_CONCEPT( iteration_pattern );
-
-// is_schedule_type<T>::value
-KOKKOS_IS_CONCEPT( schedule_type );
-
-// is_index_type<T>::value
-KOKKOS_IS_CONCEPT( index_type );
-
 }} // namespace Kokkos::Impl
 
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-namespace Kokkos {
-
-template< class ExecutionSpace , class MemorySpace >
-struct Device {
-  static_assert( Impl::is_execution_space<ExecutionSpace>::value
-               , "Execution space is not valid" );
-  static_assert( Impl::is_memory_space<MemorySpace>::value
-               , "Memory space is not valid" );
-  typedef ExecutionSpace execution_space;
-  typedef MemorySpace memory_space;
-  typedef Device<execution_space,memory_space> device_type;
-};
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< class C , class Enable = void >
-struct is_space : public Impl::false_type {};
-
-template< class C >
-struct is_space< C
-                 , typename Impl::enable_if<(
-                     Impl::is_same< C , typename C::execution_space >::value ||
-                     Impl::is_same< C , typename C::memory_space    >::value ||
-                     Impl::is_same< C , Device<
-                                             typename C::execution_space,
-                                             typename C::memory_space> >::value
-                   )>::type
-                 >
-  : public Impl::true_type
-{
-  typedef typename C::execution_space  execution_space ;
-  typedef typename C::memory_space     memory_space ;
-
-  // The host_memory_space defines a space with host-resident memory.
-  // If the execution space's memory space is host accessible then use that execution space.
-  // else use the HostSpace.
-  typedef
-      typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value
-#ifdef KOKKOS_HAVE_CUDA
-                        || Impl::is_same< memory_space , CudaUVMSpace>::value
-                        || Impl::is_same< memory_space , CudaHostPinnedSpace>::value
-#endif
-                          , memory_space , HostSpace >::type
-      host_memory_space ;
-
-  // The host_execution_space defines a space which has access to HostSpace.
-  // If the execution space can access HostSpace then use that execution space.
-  // else use the DefaultHostExecutionSpace.
-#ifdef KOKKOS_HAVE_CUDA
-  typedef
-      typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value
-                          , DefaultHostExecutionSpace , execution_space >::type
-      host_execution_space ;
-#else
-  typedef execution_space host_execution_space;
 #endif
 
-  typedef Device<host_execution_space,host_memory_space> host_mirror_space;
-};
-}
-}
-
-#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp
index 663bb1985d3636e84e236660b1c58fda5579cccc..ee9c69e9227a8127426845ef5a563636137fb279 100644
--- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp
@@ -46,7 +46,7 @@
 #ifndef KOKKOS_IMPL_TASKQUEUE_HPP
 #define KOKKOS_IMPL_TASKQUEUE_HPP
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
 
 #include <string>
 #include <typeinfo>
@@ -55,19 +55,29 @@
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-namespace Kokkos {
-
-template< typename > class TaskPolicy ;
-
-template< typename Arg1 = void , typename Arg2 = void > class Future ;
-
-} /* namespace Kokkos */
-
 namespace Kokkos {
 namespace Impl {
 
-template< typename , typename , typename > class TaskBase ;
-template< typename > class TaskExec ;
+/*\brief  Implementation data for task data management, access, and execution.
+ *
+ *  Curiously recurring template pattern (CRTP)
+ *  to allow static_cast from the
+ *  task root type and a task's FunctorType.
+ *
+ *    TaskBase< Space , ResultType , FunctorType >
+ *      : TaskBase< Space , ResultType , void >
+ *      , FunctorType
+ *      { ... };
+ *
+ *    TaskBase< Space , ResultType , void >
+ *      : TaskBase< Space , void , void >
+ *      { ... };
+ */
+template< typename Space , typename ResultType , typename FunctorType >
+class TaskBase ;
+
+template< typename Space >
+class TaskExec ;
 
 } /* namespace Impl */
 } /* namespace Kokkos */
@@ -91,7 +101,7 @@ class TaskQueue {
 private:
 
   friend class TaskQueueSpecialization< ExecSpace > ;
-  friend class Kokkos::TaskPolicy< ExecSpace > ;
+  friend class Kokkos::TaskScheduler< ExecSpace > ;
 
   using execution_space = ExecSpace ;
   using specialization  = TaskQueueSpecialization< execution_space > ;
@@ -201,7 +211,7 @@ public:
 #endif
 
       if ( *lhs ) decrement( *lhs );
-      if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count) , 1 ); }
+      if ( rhs ) { Kokkos::atomic_increment( &(rhs->m_ref_count) ); }
 
       // Force write of *lhs
 
@@ -326,7 +336,7 @@ public:
   using execution_space = ExecSpace ;
   using queue_type      = TaskQueue< execution_space > ;
 
-  template< typename > friend class Kokkos::TaskPolicy ;
+  template< typename > friend class Kokkos::TaskScheduler ;
 
   typedef void (* function_type) ( TaskBase * , void * );
 
@@ -494,6 +504,6 @@ public:
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 #endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
 
diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
index 70a880d4a2e341a9f8e78df97c57531ca53492f6..05fd06a9ade307c475ca0d127ed8bb1171f8bbf3 100644
--- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
@@ -41,7 +41,7 @@
 //@HEADER
 */
 
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
+#if defined( KOKKOS_ENABLE_TASKDAG )
 
 namespace Kokkos {
 namespace Impl {
@@ -67,6 +67,7 @@ TaskQueue< ExecSpace >::TaskQueue
             , arg_memory_pool_superblock_capacity_log2 )
   , m_ready()
   , m_accum_alloc(0)
+  , m_count_alloc(0)
   , m_max_alloc(0)
   , m_ready_count(0)
 {
@@ -122,7 +123,7 @@ void TaskQueue< ExecSpace >::decrement
     task->m_queue->deallocate( task , task->m_alloc_size );
   }   
   else if ( count <= 1 ) { 
-    Kokkos::abort("TaskPolicy task has negative reference count or is incomplete" );
+    Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" );
   }   
 }
 
@@ -565,5 +566,5 @@ void TaskQueue< ExecSpace >::complete
 } /* namespace Kokkos */
 
 
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
 
diff --git a/lib/kokkos/core/src/impl/Kokkos_Timer.hpp b/lib/kokkos/core/src/impl/Kokkos_Timer.hpp
index 1f14e42874bda3c43f5f18bced120d73366abd40..293e395b88489f9cb63aa4c9717d8dc45ea2a56e 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Timer.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Timer.hpp
@@ -44,74 +44,19 @@
 #ifndef KOKKOS_IMPLWALLTIME_HPP
 #define KOKKOS_IMPLWALLTIME_HPP
 
-#include <stddef.h>
-
-#ifdef _MSC_VER
-#undef KOKKOS_USE_LIBRT
-#include <gettimeofday.c>
-#else
-#ifdef KOKKOS_USE_LIBRT
-#include <ctime>
-#else
-#include <sys/time.h>
-#endif
-#endif
+#include <Kokkos_Timer.hpp>
 
 namespace Kokkos {
 namespace Impl {
 
-/** \brief  Time since construction */
-
-class Timer {
-private:
-  #ifdef KOKKOS_USE_LIBRT
-	struct timespec m_old;
-  #else
-	struct timeval m_old ;
-  #endif
-  Timer( const Timer & );
-  Timer & operator = ( const Timer & );
-public:
-
-  inline
-  void reset() {
-    #ifdef KOKKOS_USE_LIBRT
-	  clock_gettime(CLOCK_REALTIME, &m_old);
-    #else
-	  gettimeofday( & m_old , ((struct timezone *) NULL ) );
-    #endif
-  }
-
-  inline
-  ~Timer() {}
-
-  inline
-  Timer() { reset(); }
+/** \brief  Time since construction 
+ *   Timer promoted from Impl to Kokkos ns
+ *   This file included for backwards compatibility
+ */
 
-  inline
-  double seconds() const
-  {
-    #ifdef KOKKOS_USE_LIBRT
-      struct timespec m_new;
-      clock_gettime(CLOCK_REALTIME, &m_new);
-
-      return ( (double) ( m_new.tv_sec  - m_old.tv_sec ) ) +
-             ( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
-    #else
-      struct timeval m_new ;
-
-      ::gettimeofday( & m_new , ((struct timezone *) NULL ) );
-
-      return ( (double) ( m_new.tv_sec  - m_old.tv_sec ) ) +
-             ( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
-    #endif
-  }
-};
+  using Kokkos::Timer ;
 
 } // namespace Impl
-
-  using Kokkos::Impl::Timer ;
-
 } // namespace Kokkos
 
 #endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d66fdd9a57a7c6b4b7184c72c3f7595fca62a0c8
--- /dev/null
+++ b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp
@@ -0,0 +1,414 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CORE_IMPL_UTILITIES_HPP
+#define KOKKOS_CORE_IMPL_UTILITIES_HPP
+
+#include <Kokkos_Macros.hpp>
+#include <type_traits>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos { namespace Impl {
+
+// same as std::forward
+// needed to allow perfect forwarding on the device
+template <typename T>
+KOKKOS_INLINE_FUNCTION
+constexpr
+T&& forward( typename std::remove_reference<T>::type& arg ) noexcept
+{ return static_cast<T&&>(arg); }
+
+template <typename T>
+KOKKOS_INLINE_FUNCTION
+constexpr
+T&& forward( typename std::remove_reference<T>::type&& arg ) noexcept
+{ return static_cast<T&&>(arg); }
+
+// same as std::move
+// needed to allowing moving on the device
+template <typename T>
+KOKKOS_INLINE_FUNCTION
+constexpr
+typename std::remove_reference<T>::type&& move( T&& arg ) noexcept
+{ return static_cast<typename std::remove_reference<T>::type&&>(arg); }
+
+// empty function to allow expanding a variadic argument pack
+template<typename... Args>
+KOKKOS_INLINE_FUNCTION
+void expand_variadic(Args &&...) {}
+
+//----------------------------------------
+// C++14 integer sequence
+template< typename T , T ... Ints >
+struct integer_sequence {
+  using value_type = T ;
+  static constexpr std::size_t size() noexcept { return sizeof...(Ints); }
+};
+
+template< typename T , std::size_t N >
+struct make_integer_sequence_helper ;
+
+template< typename T , T N >
+using make_integer_sequence =
+  typename make_integer_sequence_helper<T,N>::type ;
+
+template< typename T >
+struct make_integer_sequence_helper< T , 0 >
+{ using type = integer_sequence<T> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 1 >
+{ using type = integer_sequence<T,0> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 2 >
+{ using type = integer_sequence<T,0,1> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 3 >
+{ using type = integer_sequence<T,0,1,2> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 4 >
+{ using type = integer_sequence<T,0,1,2,3> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 5 >
+{ using type = integer_sequence<T,0,1,2,3,4> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 6 >
+{ using type = integer_sequence<T,0,1,2,3,4,5> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 7 >
+{ using type = integer_sequence<T,0,1,2,3,4,5,6> ; };
+
+template< typename T >
+struct make_integer_sequence_helper< T , 8 >
+{ using type = integer_sequence<T,0,1,2,3,4,5,6,7> ; };
+
+template< typename X , typename Y >
+struct make_integer_sequence_concat ;
+
+template< typename T , T ... x , T ... y >
+struct make_integer_sequence_concat< integer_sequence<T,x...>
+                                   , integer_sequence<T,y...> >
+{ using type = integer_sequence< T , x ... , (sizeof...(x)+y)... > ; };
+
+template< typename T , std::size_t N >
+struct make_integer_sequence_helper {
+  using type = typename make_integer_sequence_concat
+    < typename make_integer_sequence_helper< T , N/2 >::type
+    , typename make_integer_sequence_helper< T , N - N/2 >::type
+    >::type ;
+};
+
+//----------------------------------------
+
+template <std::size_t... Indices>
+using index_sequence = integer_sequence<std::size_t, Indices...>;
+
+template< std::size_t N >
+using make_index_sequence = make_integer_sequence< std::size_t, N>;
+
+//----------------------------------------
+
+template <unsigned I, typename IntegerSequence>
+struct integer_sequence_at;
+
+template <unsigned I, typename T, T h0, T... tail>
+struct integer_sequence_at<I, integer_sequence<T, h0, tail...> >
+  : public integer_sequence_at<I-1u, integer_sequence<T,tail...> >
+{
+  static_assert( 8 <= I , "Reasoning Error" );
+  static_assert( I < integer_sequence<T, h0, tail...>::size(), "Error: Index out of bounds");
+};
+
+template < typename T, T h0, T... tail>
+struct integer_sequence_at<0u, integer_sequence<T,h0, tail...> >
+{
+  using type = T;
+  static constexpr T value = h0;
+};
+
+template < typename T, T h0, T h1, T... tail>
+struct integer_sequence_at<1u, integer_sequence<T, h0, h1, tail...> >
+{
+  using type = T;
+  static constexpr T value = h1;
+};
+
+template < typename T, T h0, T h1, T h2, T... tail>
+struct integer_sequence_at<2u, integer_sequence<T, h0, h1, h2, tail...> >
+{
+  using type = T;
+  static constexpr T value = h2;
+};
+
+template < typename T, T h0, T h1, T h2, T h3, T... tail>
+struct integer_sequence_at<3u, integer_sequence<T, h0, h1, h2, h3, tail...> >
+{
+  using type = T;
+  static constexpr T value = h3;
+};
+
+template < typename T, T h0, T h1, T h2, T h3, T h4, T... tail>
+struct integer_sequence_at<4u, integer_sequence<T, h0, h1, h2, h3, h4, tail...> >
+{
+  using type = T;
+  static constexpr T value = h4;
+};
+
+template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T... tail>
+struct integer_sequence_at<5u, integer_sequence<T, h0, h1, h2, h3, h4, h5, tail...> >
+{
+  using type = T;
+  static constexpr T value = h5;
+};
+
+template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T... tail>
+struct integer_sequence_at<6u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, tail...> >
+{
+  using type = T;
+  static constexpr T value = h6;
+};
+
+template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail>
+struct integer_sequence_at<7u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...> >
+{
+  using type = T;
+  static constexpr T value = h7;
+};
+
+//----------------------------------------
+
+template <typename T>
+constexpr
+T at( const unsigned, integer_sequence<T> ) noexcept
+{ return ~static_cast<T>(0); }
+
+template <typename T, T h0, T... tail>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0> ) noexcept
+{ return i==0u ? h0 : ~static_cast<T>(0); }
+
+template <typename T, T h0, T h1>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 : ~static_cast<T>(0);
+}
+
+template <typename T, T h0, T h1, T h2>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1, h2> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 :
+         i==2u ? h2 : ~static_cast<T>(0);
+}
+
+template <typename T, T h0, T h1, T h2, T h3>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 :
+         i==2u ? h2 :
+         i==3u ? h3 : ~static_cast<T>(0);
+}
+
+template <typename T, T h0, T h1, T h2, T h3, T h4>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 :
+         i==2u ? h2 :
+         i==3u ? h3 :
+         i==4u ? h4 : ~static_cast<T>(0);
+}
+
+template <typename T, T h0, T h1, T h2, T h3, T h4, T h5>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 :
+         i==2u ? h2 :
+         i==3u ? h3 :
+         i==4u ? h4 :
+         i==5u ? h5 : ~static_cast<T>(0);
+}
+
+template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 :
+         i==2u ? h2 :
+         i==3u ? h3 :
+         i==4u ? h4 :
+         i==5u ? h5 :
+         i==6u ? h6 : ~static_cast<T>(0);
+}
+
+template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail>
+constexpr
+T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...> ) noexcept
+{ return i==0u ? h0 :
+         i==1u ? h1 :
+         i==2u ? h2 :
+         i==3u ? h3 :
+         i==4u ? h4 :
+         i==5u ? h5 :
+         i==6u ? h6 :
+         i==7u ? h7 : at(i-8u, integer_sequence<T, tail...>{} );
+}
+
+//----------------------------------------
+
+
+template < typename IntegerSequence
+         , typename ResultSequence = integer_sequence<typename IntegerSequence::value_type>
+         >
+struct reverse_integer_sequence_helper;
+
+template <typename T, T h0, T... tail, T... results>
+struct reverse_integer_sequence_helper< integer_sequence<T, h0, tail...>, integer_sequence<T, results...> >
+  : public reverse_integer_sequence_helper< integer_sequence<T, tail...>, integer_sequence<T, h0, results...> >
+{};
+
+template <typename T, T... results>
+struct reverse_integer_sequence_helper< integer_sequence<T>, integer_sequence<T, results...> >
+{
+  using type = integer_sequence<T, results...>;
+};
+
+
+template <typename IntegerSequence>
+using reverse_integer_sequence = typename reverse_integer_sequence_helper<IntegerSequence>::type;
+
+//----------------------------------------
+
+template < typename IntegerSequence
+         , typename Result
+         , typename ResultSequence = integer_sequence<typename IntegerSequence::value_type>
+         >
+struct exclusive_scan_integer_sequence_helper;
+
+template <typename T, T h0, T... tail, typename Result, T... results>
+struct exclusive_scan_integer_sequence_helper
+  < integer_sequence<T, h0, tail...>
+  , Result
+  , integer_sequence<T, results...> >
+  : public exclusive_scan_integer_sequence_helper
+     < integer_sequence<T, tail...>
+     , std::integral_constant<T,Result::value+h0>
+     , integer_sequence<T, 0, (results+h0)...> >
+{};
+
+template <typename T, typename Result, T... results>
+struct exclusive_scan_integer_sequence_helper
+  < integer_sequence<T>, Result, integer_sequence<T, results...> >
+{
+  using type = integer_sequence<T, results...>;
+  static constexpr T value = Result::value ;
+};
+
+template <typename IntegerSequence>
+struct exclusive_scan_integer_sequence
+{
+  using value_type = typename IntegerSequence::value_type;
+  using helper = 
+    exclusive_scan_integer_sequence_helper
+       < reverse_integer_sequence<IntegerSequence>
+       , std::integral_constant< value_type , 0 >
+       > ;
+  using type = typename helper::type ;
+  static constexpr value_type value  = helper::value ;
+};
+
+//----------------------------------------
+
+template < typename IntegerSequence
+         , typename Result
+         , typename ResultSequence = integer_sequence<typename IntegerSequence::value_type>
+         >
+struct inclusive_scan_integer_sequence_helper;
+
+template <typename T, T h0, T... tail, typename Result, T... results>
+struct inclusive_scan_integer_sequence_helper
+  < integer_sequence<T, h0, tail...>
+  , Result
+  , integer_sequence<T, results...> >
+  : public inclusive_scan_integer_sequence_helper
+     < integer_sequence<T, tail...>
+     , std::integral_constant<T,Result::value+h0>
+     , integer_sequence<T, h0, (results+h0)...> >
+{};
+
+template <typename T, typename Result, T... results>
+struct inclusive_scan_integer_sequence_helper
+  < integer_sequence<T>, Result, integer_sequence<T, results...> >
+{
+  using type = integer_sequence<T, results...>;
+  static constexpr T value = Result::value ;
+};
+
+template <typename IntegerSequence>
+struct inclusive_scan_integer_sequence
+{
+  using value_type = typename IntegerSequence::value_type;
+  using helper = 
+    inclusive_scan_integer_sequence_helper
+       < reverse_integer_sequence<IntegerSequence>
+       , std::integral_constant< value_type , 0 >
+       > ;
+  using type = typename helper::type ;
+  static constexpr value_type value  = helper::value ;
+};
+
+}} // namespace Kokkos::Impl
+
+
+#endif //KOKKOS_CORE_IMPL_UTILITIES
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp
similarity index 96%
rename from lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
rename to lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp
index 17d28ace4dae471accfa91ab52629aee357850e9..c55636b64ea8331ae2a2d66fc2479b727cbf5115 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp
@@ -116,7 +116,7 @@ class ViewMapping< Traits ,
 private:
 
   template< class , class ... > friend class ViewMapping ;
-  template< class , class ... > friend class Kokkos::Experimental::View ;
+  template< class , class ... > friend class Kokkos::View ;
 
   typedef ViewOffset< typename Traits::dimension
                     , typename Traits::array_layout
@@ -301,17 +301,17 @@ public:
   //----------------------------------------
 
   template< class ... P >
-  SharedAllocationRecord<> *
-  allocate_shared( ViewCtorProp< P... > const & arg_prop
+  Kokkos::Impl::SharedAllocationRecord<> *
+  allocate_shared( Kokkos::Impl::ViewCtorProp< P... > const & arg_prop
                  , typename Traits::array_layout const & arg_layout
                  )
   {
-    typedef ViewCtorProp< P... > alloc_prop ;
+    typedef Kokkos::Impl::ViewCtorProp< P... > alloc_prop ;
 
     typedef typename alloc_prop::execution_space  execution_space ;
     typedef typename Traits::memory_space         memory_space ;
     typedef ViewValueFunctor< execution_space , scalar_type > functor_type ;
-    typedef SharedAllocationRecord< memory_space , functor_type > record_type ;
+    typedef Kokkos::Impl::SharedAllocationRecord< memory_space , functor_type > record_type ;
 
     // Query the mapping for byte-size of allocation.
     typedef std::integral_constant< unsigned ,
@@ -324,8 +324,8 @@ public:
 
     // Allocate memory from the memory space and create tracking record.
     record_type * const record =
-      record_type::allocate( ((ViewCtorProp<void,memory_space> const &) arg_prop ).value
-                           , ((ViewCtorProp<void,std::string>  const &) arg_prop ).value
+      record_type::allocate( ((Kokkos::Impl::ViewCtorProp<void,memory_space> const &) arg_prop ).value
+                           , ((Kokkos::Impl::ViewCtorProp<void,std::string>  const &) arg_prop ).value
                            , alloc_size );
 
     if ( alloc_size ) {
@@ -334,7 +334,7 @@ public:
 
       if ( alloc_prop::initialize ) {
         // The functor constructs and destroys
-        record->m_destroy = functor_type( ((ViewCtorProp<void,execution_space> const & )arg_prop).value
+        record->m_destroy = functor_type( ((Kokkos::Impl::ViewCtorProp<void,execution_space> const & )arg_prop).value
                                         , (pointer_type) m_handle
                                         , m_offset.span() * Array_N
                                         );
@@ -377,7 +377,7 @@ public:
 
   enum { is_assignable = true };
 
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker  TrackType ;
+  typedef Kokkos::Impl::SharedAllocationTracker  TrackType ;
   typedef ViewMapping< DstTraits , void >  DstType ;
   typedef ViewMapping< SrcTraits , void >  SrcType ;
 
@@ -436,7 +436,7 @@ public:
   enum { is_assignable = std::is_same< typename DstTraits::data_type ,    typename SrcTraits::scalar_array_type >::value &&
                          std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
 
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker  TrackType ;
+  typedef Kokkos::Impl::SharedAllocationTracker  TrackType ;
   typedef ViewMapping< DstTraits , void >  DstType ;
   typedef ViewMapping< SrcTraits , void >  SrcType ;
 
@@ -558,13 +558,13 @@ private:
 
 public:
 
-  typedef Kokkos::Experimental::ViewTraits
+  typedef Kokkos::ViewTraits
     < data_type
     , array_layout
     , typename SrcTraits::device_type
     , typename SrcTraits::memory_traits > traits_type ;
 
-  typedef Kokkos::Experimental::View
+  typedef Kokkos::View
     < data_type
     , array_layout
     , typename SrcTraits::device_type
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp
similarity index 99%
rename from lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp
rename to lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp
index 6525fed0a5ceb5995db3517b84fec6f7985e6d54..6381aee468c9ee114c5c050e20565c2a8e52b127 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp
@@ -70,7 +70,6 @@ struct ViewAllocateWithoutInitializing {
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 struct WithoutInitializing_t {};
@@ -242,7 +241,6 @@ public:
 };
 
 } /* namespace Impl */
-} /* namespace Experimental */
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
deleted file mode 100644
index 94c8e13c1d445953fabc852aaece3fa8d07fa5eb..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
+++ /dev/null
@@ -1,886 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_VIEWDEFAULT_HPP
-#define KOKKOS_VIEWDEFAULT_HPP
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template<>
-struct ViewAssignment< ViewDefault , ViewDefault , void >
-{
-  typedef ViewDefault Specialize ;
-
-  //------------------------------------
-  /** \brief  Compatible value and shape and LayoutLeft/Right to LayoutStride*/
-
-  template< class DT , class DL , class DD , class DM ,
-            class ST , class SL , class SD , class SM >
-  KOKKOS_INLINE_FUNCTION
-  ViewAssignment(       View<DT,DL,DD,DM,Specialize> & dst ,
-                  const View<ST,SL,SD,SM,Specialize> & src ,
-                  const typename enable_if<(
-                    ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
-                                    ViewTraits<ST,SL,SD,SM> >::value
-                    ||
-                    ( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
-                                      ViewTraits<ST,SL,SD,SM> >::assignable_value
-                      &&
-                      ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
-                                       typename ViewTraits<ST,SL,SD,SM>::shape_type >::value
-                      &&
-                      is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value
-                      && (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value ||
-                          is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value))
-                  )>::type * = 0 )
-  {
-    dst.m_offset_map.assign( src.m_offset_map );
-
-    dst.m_management = src.m_management ;
-
-    dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
-
-    if( dst.is_managed )
-      dst.m_tracker = src.m_tracker ;
-    else {
-      dst.m_tracker = AllocationTracker();
-      dst.m_management.set_unmanaged();
-    }
-  }
-
-
-  /** \brief  Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */
-
-  template< class DT , class DL , class DD , class DM ,
-            class ST , class SD , class SM >
-  KOKKOS_INLINE_FUNCTION
-  ViewAssignment(       View<DT,DL,DD,DM,Specialize> & dst ,
-                  const View<ST,LayoutStride,SD,SM,Specialize> & src ,
-                  const typename enable_if<(
-                    (
-                      ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
-                                    ViewTraits<ST,LayoutStride,SD,SM> >::value
-                      ||
-                      ( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
-                                      ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value
-                        &&
-                        ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
-                                       typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value
-                      )
-                     )
-                     &&
-                      (View<DT,DL,DD,DM,Specialize>::rank==1)
-                     && (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value ||
-                          is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value)
-                  )>::type * = 0 )
-  {
-    size_t strides[8];
-    src.stride(strides);
-    if(strides[0]!=1) {
-      Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
-    }
-    dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 );
-
-    dst.m_management = src.m_management ;
-
-    dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
-
-    if( dst.is_managed )
-      dst.m_tracker = src.m_tracker ;
-    else {
-      dst.m_tracker = AllocationTracker();
-      dst.m_management.set_unmanaged();
-    }
-  }
-
-  //------------------------------------
-  /** \brief  Deep copy data from compatible value type, layout, rank, and specialization.
-   *          Check the dimensions and allocation lengths at runtime.
-   */
-  template< class DT , class DL , class DD , class DM ,
-            class ST , class SL , class SD , class SM >
-  inline static
-  void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst ,
-                  const View<ST,SL,SD,SM,Specialize> & src ,
-                  const typename Impl::enable_if<(
-                    Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
-                                   typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
-                    &&
-                    Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
-                                   typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
-                    &&
-                    ( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
-                  )>::type * = 0 )
-  {
-    typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ;
-    typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ;
-
-    if ( dst.ptr_on_device() != src.ptr_on_device() ) {
-
-      Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map );
-
-      const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity();
-
-      DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
-    }
-  }
-};
-
-} /* namespace Impl */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< class ExecSpace , class DT , class DL, class DD, class DM, class DS >
-struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true >
-{
-  Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ;
-
-  KOKKOS_FORCEINLINE_FUNCTION
-  void operator()( const typename ExecSpace::size_type& i ) const
-    { new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); }
-
-  ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity )
-    : m_ptr( pointer )
-    {
-      Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
-      parallel_for( range , *this );
-      ExecSpace::fence();
-    }
-};
-
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
-        >
-struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                  , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                  , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
-{
-private:
-
-  typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >  SrcViewType ;
-
-  enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
-  enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
-  enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
-  enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
-  enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
-  enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
-  enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
-  enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
-
-  // The source view rank must be equal to the input argument rank
-  // Once a void argument is encountered all subsequent arguments must be void.
-  enum { InputRank =
-    Impl::StaticAssert<( SrcViewType::rank ==
-                         ( V0 ? 0 : (
-                           V1 ? 1 : (
-                           V2 ? 2 : (
-                           V3 ? 3 : (
-                           V4 ? 4 : (
-                           V5 ? 5 : (
-                           V6 ? 6 : (
-                           V7 ? 7 : 8 ))))))) ))
-                       &&
-                       ( SrcViewType::rank ==
-                         ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
-    >::value ? SrcViewType::rank : 0 };
-
-  enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
-  enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
-  enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
-  enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
-  enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
-  enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
-  enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
-  enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
-
-  enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
-                    + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
-
-  // Reverse
-  enum { R0_rev = 0 == InputRank ? 0u : (
-                  1 == InputRank ? unsigned(R0) : (
-                  2 == InputRank ? unsigned(R1) : (
-                  3 == InputRank ? unsigned(R2) : (
-                  4 == InputRank ? unsigned(R3) : (
-                  5 == InputRank ? unsigned(R4) : (
-                  6 == InputRank ? unsigned(R5) : (
-                  7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
-
-  typedef typename SrcViewType::array_layout  SrcViewLayout ;
-
-  // Choose array layout, attempting to preserve original layout if at all possible.
-  typedef typename Impl::if_c<
-     ( // Same Layout IF
-       // OutputRank 0
-       ( OutputRank == 0 )
-       ||
-       // OutputRank 1 or 2, InputLayout Left, Interval 0
-       // because single stride one or second index has a stride.
-       ( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
-       ||
-       // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
-       // because single stride one or second index has a stride.
-       ( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
-     ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
-
-  // Choose data type as a purely dynamic rank array to accomodate a runtime range.
-  typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
-          typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
-          typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
-          typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
-          typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
-          typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
-          typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
-          typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
-                                                 typename SrcViewType::value_type ********
-  >::type >::type >::type >::type >::type >::type >::type >::type  OutputData ;
-
-  // Choose space.
-  // If the source view's template arg1 or arg2 is a space then use it,
-  // otherwise use the source view's execution space.
-
-  typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
-          typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type
-  >::type >::type OutputSpace ;
-
-public:
-
-  // If keeping the layout then match non-data type arguments
-  // else keep execution space and memory traits.
-  typedef typename
-    Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
-              , Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-              , Kokkos::View< OutputData , OutputViewLayout , OutputSpace
-                            , typename SrcViewType::memory_traits
-                            , Impl::ViewDefault >
-              >::type  type ;
-};
-
-} /* namespace Impl */
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-// Construct subview of a Rank 8 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    , const SubArg2_type & arg2
-    , const SubArg3_type & arg3
-    , const SubArg4_type & arg4
-    , const SubArg5_type & arg5
-    , const SubArg6_type & arg6
-    , const SubArg7_type & arg7
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                           , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-    typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
-    typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
-    typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
-    typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
-    typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
-    typedef Impl::ViewOffsetRange< SubArg7_type > R7 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , R2::dimension( src.m_offset_map.N2 , arg2 )
-                                 , R3::dimension( src.m_offset_map.N3 , arg3 )
-                                 , R4::dimension( src.m_offset_map.N4 , arg4 )
-                                 , R5::dimension( src.m_offset_map.N5 , arg5 )
-                                 , R6::dimension( src.m_offset_map.N6 , arg6 )
-                                 , R7::dimension( src.m_offset_map.N7 , arg7 )
-                                 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        , R2::begin( arg2 )
-                                        , R3::begin( arg3 )
-                                        , R4::begin( arg4 )
-                                        , R5::begin( arg5 )
-                                        , R6::begin( arg6 )
-                                        , R7::begin( arg7 ) );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 7 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        , class SubArg4_type , class SubArg5_type , class SubArg6_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    , const SubArg2_type & arg2
-    , const SubArg3_type & arg3
-    , const SubArg4_type & arg4
-    , const SubArg5_type & arg5
-    , const SubArg6_type & arg6
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                           , SubArg4_type , SubArg5_type , SubArg6_type , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-    typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
-    typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
-    typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
-    typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
-    typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , R2::dimension( src.m_offset_map.N2 , arg2 )
-                                 , R3::dimension( src.m_offset_map.N3 , arg3 )
-                                 , R4::dimension( src.m_offset_map.N4 , arg4 )
-                                 , R5::dimension( src.m_offset_map.N5 , arg5 )
-                                 , R6::dimension( src.m_offset_map.N6 , arg6 )
-                                 , 0
-                                 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        , R2::begin( arg2 )
-                                        , R3::begin( arg3 )
-                                        , R4::begin( arg4 )
-                                        , R5::begin( arg5 )
-                                        , R6::begin( arg6 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 6 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        , class SubArg4_type , class SubArg5_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    , const SubArg2_type & arg2
-    , const SubArg3_type & arg3
-    , const SubArg4_type & arg4
-    , const SubArg5_type & arg5
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                           , SubArg4_type , SubArg5_type , void , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-    typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
-    typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
-    typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
-    typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , R2::dimension( src.m_offset_map.N2 , arg2 )
-                                 , R3::dimension( src.m_offset_map.N3 , arg3 )
-                                 , R4::dimension( src.m_offset_map.N4 , arg4 )
-                                 , R5::dimension( src.m_offset_map.N5 , arg5 )
-                                 , 0
-                                 , 0
-                                 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        , R2::begin( arg2 )
-                                        , R3::begin( arg3 )
-                                        , R4::begin( arg4 )
-                                        , R5::begin( arg5 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 5 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        , class SubArg4_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    , const SubArg2_type & arg2
-    , const SubArg3_type & arg3
-    , const SubArg4_type & arg4
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                           , SubArg4_type , void , void , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-    typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
-    typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
-    typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , R2::dimension( src.m_offset_map.N2 , arg2 )
-                                 , R3::dimension( src.m_offset_map.N3 , arg3 )
-                                 , R4::dimension( src.m_offset_map.N4 , arg4 )
-                                 , 0
-                                 , 0
-                                 , 0
-                                 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        , R2::begin( arg2 )
-                                        , R3::begin( arg3 )
-                                        , R4::begin( arg4 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 4 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    , const SubArg2_type & arg2
-    , const SubArg3_type & arg3
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
-                           , void , void , void , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-    typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
-    typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , R2::dimension( src.m_offset_map.N2 , arg2 )
-                                 , R3::dimension( src.m_offset_map.N3 , arg3 )
-                                 , 0
-                                 , 0
-                                 , 0
-                                 , 0
-                                 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        , R2::begin( arg2 )
-                                        , R3::begin( arg3 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 3 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type , class SubArg2_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    , const SubArg2_type & arg2
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-    typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , R2::dimension( src.m_offset_map.N2 , arg2 )
-                                 , 0 , 0 , 0 , 0 , 0);
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        , R2::begin( arg2 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 2 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type , class SubArg1_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    , const SubArg1_type & arg1
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , SubArg1_type , void , void , void , void , void , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-    typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , R1::dimension( src.m_offset_map.N1 , arg1 )
-                                 , 0 , 0 , 0 , 0 , 0 , 0 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        , R1::begin( arg1 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-// Construct subview of a Rank 1 view
-template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
-template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
-        , class SubArg0_type
-        >
-KOKKOS_INLINE_FUNCTION
-View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
-View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
-    , const SubArg0_type & arg0
-    )
-  : m_ptr_on_device( (typename traits::value_type*) NULL)
-  , m_offset_map()
-  , m_management()
-  , m_tracker()
-{
-  // This constructor can only be used to construct a subview
-  // from the source view.  This type must match the subview type
-  // deduced from the source view and subview arguments.
-
-  typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
-                           , SubArg0_type , void , void , void , void , void , void , void >
-    ViewSubviewDeduction ;
-
-  enum { is_a_valid_subview_constructor =
-    Impl::StaticAssert<
-      Impl::is_same< View , typename ViewSubviewDeduction::type >::value
-    >::value
-  };
-
-  if ( is_a_valid_subview_constructor ) {
-
-    typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
-
-    // 'assign_subview' returns whether the subview offset_map
-    // introduces noncontiguity in the view.
-    const bool introduce_noncontiguity =
-      m_offset_map.assign_subview( src.m_offset_map
-                                 , R0::dimension( src.m_offset_map.N0 , arg0 )
-                                 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
-
-    if ( m_offset_map.capacity() ) {
-
-      m_management = src.m_management ;
-
-      if ( introduce_noncontiguity ) m_management.set_noncontiguous();
-
-      m_ptr_on_device = src.m_ptr_on_device +
-                        src.m_offset_map( R0::begin( arg0 )
-                                        );
-      m_tracker = src.m_tracker ;
-    }
-  }
-}
-
-} /* namespace Kokkos */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */
-
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..588166c1855402851b40d38e5fdb98cd585c7e00
--- /dev/null
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
@@ -0,0 +1,3156 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP
+#define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP
+
+#include <type_traits>
+#include <initializer_list>
+
+#include <Kokkos_Core_fwd.hpp>
+#include <Kokkos_Pair.hpp>
+#include <Kokkos_Layout.hpp>
+#include <impl/Kokkos_Error.hpp>
+#include <impl/Kokkos_Traits.hpp>
+#include <impl/Kokkos_ViewCtor.hpp>
+#include <impl/Kokkos_Atomic_View.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template< unsigned I , size_t ... Args >
+struct variadic_size_t
+  { enum { value = ~size_t(0) }; };
+
+template< size_t Val , size_t ... Args >
+struct variadic_size_t< 0 , Val , Args ... >
+  { enum { value = Val }; };
+
+template< unsigned I , size_t Val , size_t ... Args >
+struct variadic_size_t< I , Val , Args ... >
+  { enum { value = variadic_size_t< I - 1 , Args ... >::value }; };
+
+template< size_t ... Args >
+struct rank_dynamic ;
+
+template<>
+struct rank_dynamic<> { enum { value = 0 }; };
+
+template< size_t Val , size_t ... Args >
+struct rank_dynamic< Val , Args... >
+{
+  enum { value = ( Val == 0 ? 1 : 0 ) + rank_dynamic< Args... >::value };
+};
+
+#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \
+  template< size_t V , unsigned > struct ViewDimension ## R \
+    { \
+      enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \
+      enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \
+      KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \
+      ViewDimension ## R () = default ; \
+      ViewDimension ## R ( const ViewDimension ## R  & ) = default ; \
+      ViewDimension ## R & operator = ( const ViewDimension ## R  & ) = default ; \
+    }; \
+  template< unsigned RD > struct ViewDimension ## R < 0 , RD > \
+    { \
+      enum { ArgN ## R = 0 }; \
+      typename std::conditional<( RD < 3 ), size_t , unsigned >::type N ## R ; \
+      ViewDimension ## R () = default ; \
+      ViewDimension ## R ( const ViewDimension ## R  & ) = default ; \
+      ViewDimension ## R & operator = ( const ViewDimension ## R  & ) = default ; \
+      KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t V ) : N ## R ( V ) {} \
+    };
+
+KOKKOS_IMPL_VIEW_DIMENSION( 0 )
+KOKKOS_IMPL_VIEW_DIMENSION( 1 )
+KOKKOS_IMPL_VIEW_DIMENSION( 2 )
+KOKKOS_IMPL_VIEW_DIMENSION( 3 )
+KOKKOS_IMPL_VIEW_DIMENSION( 4 )
+KOKKOS_IMPL_VIEW_DIMENSION( 5 )
+KOKKOS_IMPL_VIEW_DIMENSION( 6 )
+KOKKOS_IMPL_VIEW_DIMENSION( 7 )
+
+#undef KOKKOS_IMPL_VIEW_DIMENSION
+
+template< size_t ... Vals >
+struct ViewDimension
+  : public ViewDimension0< variadic_size_t<0,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension1< variadic_size_t<1,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension2< variadic_size_t<2,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension3< variadic_size_t<3,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension4< variadic_size_t<4,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension5< variadic_size_t<5,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension6< variadic_size_t<6,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+  , public ViewDimension7< variadic_size_t<7,Vals...>::value 
+                         , rank_dynamic< Vals... >::value >
+{
+  typedef ViewDimension0< variadic_size_t<0,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D0 ;
+  typedef ViewDimension1< variadic_size_t<1,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D1 ;
+  typedef ViewDimension2< variadic_size_t<2,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D2 ;
+  typedef ViewDimension3< variadic_size_t<3,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D3 ;
+  typedef ViewDimension4< variadic_size_t<4,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D4 ;
+  typedef ViewDimension5< variadic_size_t<5,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D5 ;
+  typedef ViewDimension6< variadic_size_t<6,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D6 ;
+  typedef ViewDimension7< variadic_size_t<7,Vals...>::value 
+                        , rank_dynamic< Vals... >::value > D7 ;
+
+  using D0::ArgN0 ;
+  using D1::ArgN1 ;
+  using D2::ArgN2 ;
+  using D3::ArgN3 ;
+  using D4::ArgN4 ;
+  using D5::ArgN5 ;
+  using D6::ArgN6 ;
+  using D7::ArgN7 ;
+
+  using D0::N0 ;
+  using D1::N1 ;
+  using D2::N2 ;
+  using D3::N3 ;
+  using D4::N4 ;
+  using D5::N5 ;
+  using D6::N6 ;
+  using D7::N7 ;
+
+  enum { rank = sizeof...(Vals) };
+  enum { rank_dynamic = Impl::rank_dynamic< Vals... >::value };
+
+  ViewDimension() = default ;
+  ViewDimension( const ViewDimension & ) = default ;
+  ViewDimension & operator = ( const ViewDimension & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr
+  ViewDimension( size_t n0 , size_t n1 , size_t n2 , size_t n3
+               , size_t n4 , size_t n5 , size_t n6 , size_t n7 )
+    : D0( n0 )
+    , D1( n1 )
+    , D2( n2 )
+    , D3( n3 )
+    , D4( n4 )
+    , D5( n5 )
+    , D6( n6 )
+    , D7( n7 )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_t extent( const unsigned r ) const
+    {
+      return r == 0 ? N0 : (
+             r == 1 ? N1 : (
+             r == 2 ? N2 : (
+             r == 3 ? N3 : (
+             r == 4 ? N4 : (
+             r == 5 ? N5 : (
+             r == 6 ? N6 : (
+             r == 7 ? N7 : 0 )))))));
+    }
+
+  template< size_t N >
+  struct prepend { typedef ViewDimension< N , Vals... > type ; };
+
+  template< size_t N >
+  struct append { typedef ViewDimension< Vals... , N > type ; };
+};
+
+template< class A , class B >
+struct ViewDimensionJoin ;
+
+template< size_t ... A , size_t ... B >
+struct ViewDimensionJoin< ViewDimension< A... > , ViewDimension< B... > > {
+  typedef ViewDimension< A... , B... > type ;
+};
+
+//----------------------------------------------------------------------------
+
+template< class DstDim , class SrcDim >
+struct ViewDimensionAssignable ;
+
+template< size_t ... DstArgs , size_t ... SrcArgs >
+struct ViewDimensionAssignable< ViewDimension< DstArgs ... >
+                              , ViewDimension< SrcArgs ... > >
+{
+  typedef ViewDimension< DstArgs... > dst ;
+  typedef ViewDimension< SrcArgs... > src ;
+
+  enum { value =
+    unsigned(dst::rank) == unsigned(src::rank) && (
+      //Compile time check that potential static dimensions match
+      ( ( 1 > dst::rank_dynamic && 1 > src::rank_dynamic ) ? (size_t(dst::ArgN0) == size_t(src::ArgN0)) : true ) &&
+      ( ( 2 > dst::rank_dynamic && 2 > src::rank_dynamic ) ? (size_t(dst::ArgN1) == size_t(src::ArgN1)) : true ) &&
+      ( ( 3 > dst::rank_dynamic && 3 > src::rank_dynamic ) ? (size_t(dst::ArgN2) == size_t(src::ArgN2)) : true ) &&
+      ( ( 4 > dst::rank_dynamic && 4 > src::rank_dynamic ) ? (size_t(dst::ArgN3) == size_t(src::ArgN3)) : true ) &&
+      ( ( 5 > dst::rank_dynamic && 5 > src::rank_dynamic ) ? (size_t(dst::ArgN4) == size_t(src::ArgN4)) : true ) &&
+      ( ( 6 > dst::rank_dynamic && 6 > src::rank_dynamic ) ? (size_t(dst::ArgN5) == size_t(src::ArgN5)) : true ) &&
+      ( ( 7 > dst::rank_dynamic && 7 > src::rank_dynamic ) ? (size_t(dst::ArgN6) == size_t(src::ArgN6)) : true ) &&
+      ( ( 8 > dst::rank_dynamic && 8 > src::rank_dynamic ) ? (size_t(dst::ArgN7) == size_t(src::ArgN7)) : true )
+    )};
+
+};
+
+}}} // namespace Kokkos::Experimental::Impl
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+struct ALL_t {
+  KOKKOS_INLINE_FUNCTION
+  constexpr const ALL_t & operator()() const { return *this ; }
+};
+
+}} // namespace Kokkos::Impl
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+using Kokkos::Impl::ALL_t ;
+
+template< class T >
+struct is_integral_extent_type
+{ enum { value = std::is_same<T,Kokkos::Experimental::Impl::ALL_t>::value ? 1 : 0 }; };
+
+template< class iType >
+struct is_integral_extent_type< std::pair<iType,iType> >
+{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
+
+template< class iType >
+struct is_integral_extent_type< Kokkos::pair<iType,iType> >
+{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
+
+// Assuming '2 == initializer_list<iType>::size()'
+template< class iType >
+struct is_integral_extent_type< std::initializer_list<iType> >
+{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
+
+template < unsigned I , class ... Args >
+struct is_integral_extent
+{
+  // get_type is void when sizeof...(Args) <= I
+  typedef typename std::remove_cv<
+          typename std::remove_reference<
+          typename Kokkos::Impl::get_type<I,Args...
+          >::type >::type >::type type ;
+
+  enum { value = is_integral_extent_type<type>::value };
+
+  static_assert( value ||
+                 std::is_integral<type>::value ||
+                 std::is_same<type,void>::value 
+               , "subview argument must be either integral or integral extent" );
+};
+
+// Rules for subview arguments and layouts matching
+
+template<class LayoutDest, class LayoutSrc, int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs>
+struct SubviewLegalArgsCompileTime;
+
+// Rules which allow LayoutLeft to LayoutLeft assignment
+
+template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> {
+  enum { value      =(((CurrentArg==RankDest-1) && (Kokkos::Experimental::Impl::is_integral_extent_type<Arg>::value)) ||
+                      ((CurrentArg>=RankDest) && (std::is_integral<Arg>::value)) ||
+                      ((CurrentArg<RankDest) && (std::is_same<Arg,Kokkos::Impl::ALL_t>::value)) ||
+                      ((CurrentArg==0) && (Kokkos::Experimental::Impl::is_integral_extent_type<Arg>::value))
+                     ) && (SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankDest, RankSrc, CurrentArg+1, SubViewArgs...>::value)};
+};
+
+template<int RankDest, int RankSrc, int CurrentArg, class Arg>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankDest, RankSrc, CurrentArg, Arg> {
+  enum { value = ((CurrentArg==RankDest-1) || (std::is_integral<Arg>::value)) &&
+                 (CurrentArg==RankSrc-1) };
+};
+
+// Rules which allow LayoutRight to LayoutRight assignment 
+
+template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> {
+  enum { value      =(((CurrentArg==RankSrc-RankDest) && (Kokkos::Experimental::Impl::is_integral_extent_type<Arg>::value)) ||
+                      ((CurrentArg<RankSrc-RankDest) && (std::is_integral<Arg>::value)) ||
+                      ((CurrentArg>=RankSrc-RankDest) && (std::is_same<Arg,Kokkos::Impl::ALL_t>::value))
+                     ) && (SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg+1, SubViewArgs...>::value)};
+};
+
+template<int RankDest, int RankSrc, int CurrentArg, class Arg>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg> {
+  enum { value = ((CurrentArg==RankSrc-1) && (std::is_same<Arg,Kokkos::Impl::ALL_t>::value)) };
+};
+
+// Rules which allow assignment to LayoutStride
+
+template<int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutStride,Kokkos::LayoutLeft,RankDest,RankSrc,CurrentArg,SubViewArgs...> {
+  enum { value = true };
+};
+
+template<int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutStride,Kokkos::LayoutRight,RankDest,RankSrc,CurrentArg,SubViewArgs...> {
+  enum { value = true };
+};
+
+template<int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs>
+struct SubviewLegalArgsCompileTime<Kokkos::LayoutStride,Kokkos::LayoutStride,RankDest,RankSrc,CurrentArg,SubViewArgs...> {
+  enum { value = true };
+};
+
+
+template< unsigned DomainRank , unsigned RangeRank >
+struct SubviewExtents {
+private:
+
+  // Cannot declare zero-length arrays
+  enum { InternalRangeRank = RangeRank ? RangeRank : 1u };
+
+  size_t   m_begin[  DomainRank ];
+  size_t   m_length[ InternalRangeRank ];
+  unsigned m_index[  InternalRangeRank ];
+
+  template< size_t ... DimArgs >
+  KOKKOS_FORCEINLINE_FUNCTION
+  bool set( unsigned domain_rank
+          , unsigned range_rank
+          , const ViewDimension< DimArgs ... > & dim )
+    { return true ; }
+
+  template< class T , size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  bool set( unsigned domain_rank
+          , unsigned range_rank
+          , const ViewDimension< DimArgs ... > & dim
+          , const T & val
+          , Args ... args )
+    {
+      const size_t v = static_cast<size_t>(val);
+
+      m_begin[ domain_rank ] = v ;
+
+      return set( domain_rank + 1 , range_rank , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+             && ( v < dim.extent( domain_rank ) )
+#endif
+      ;
+    }
+
+  // ALL_t
+  template< size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  bool set( unsigned domain_rank
+          , unsigned range_rank
+          , const ViewDimension< DimArgs ... > & dim
+          , const Kokkos::Experimental::Impl::ALL_t 
+          , Args ... args )
+    {
+      m_begin[  domain_rank ] = 0 ;
+      m_length[ range_rank  ] = dim.extent( domain_rank );
+      m_index[  range_rank  ] = domain_rank ;
+
+      return set( domain_rank + 1 , range_rank + 1 , dim , args... );
+    }
+
+  // std::pair range
+  template< class T , size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  bool set( unsigned domain_rank
+          , unsigned range_rank
+          , const ViewDimension< DimArgs ... > & dim
+          , const std::pair<T,T> & val
+          , Args ... args )
+    {
+      const size_t b = static_cast<size_t>( val.first );
+      const size_t e = static_cast<size_t>( val.second );
+
+      m_begin[  domain_rank ] = b ;
+      m_length[ range_rank  ] = e - b ;
+      m_index[  range_rank  ] = domain_rank ;
+
+      return set( domain_rank + 1 , range_rank + 1 , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+             && ( e <= b + dim.extent( domain_rank ) )
+#endif
+      ;
+    }
+
+  // Kokkos::pair range
+  template< class T , size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  bool set( unsigned domain_rank
+          , unsigned range_rank
+          , const ViewDimension< DimArgs ... > & dim
+          , const Kokkos::pair<T,T> & val
+          , Args ... args )
+    {
+      const size_t b = static_cast<size_t>( val.first );
+      const size_t e = static_cast<size_t>( val.second );
+
+      m_begin[  domain_rank ] = b ;
+      m_length[ range_rank  ] = e - b ;
+      m_index[  range_rank  ] = domain_rank ;
+
+      return set( domain_rank + 1 , range_rank + 1 , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+             && ( e <= b + dim.extent( domain_rank ) )
+#endif
+      ;
+    }
+
+  // { begin , end } range
+  template< class T , size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  bool set( unsigned domain_rank
+          , unsigned range_rank
+          , const ViewDimension< DimArgs ... > & dim
+          , const std::initializer_list< T > & val
+          , Args ... args )
+    {
+      const size_t b = static_cast<size_t>( val.begin()[0] );
+      const size_t e = static_cast<size_t>( val.begin()[1] );
+
+      m_begin[  domain_rank ] = b ;
+      m_length[ range_rank  ] = e - b ;
+      m_index[  range_rank  ] = domain_rank ;
+
+      return set( domain_rank + 1 , range_rank + 1 , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+             && ( val.size() == 2 )
+             && ( e <= b + dim.extent( domain_rank ) )
+#endif
+      ;
+    }
+
+  //------------------------------
+
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+
+  template< size_t ... DimArgs >
+  void error( char *
+            , int
+            , unsigned
+            , unsigned
+            , const ViewDimension< DimArgs ... > & ) const
+    {}
+
+  template< class T , size_t ... DimArgs , class ... Args >
+  void error( char * buf , int buf_len
+            , unsigned domain_rank
+            , unsigned range_rank
+            , const ViewDimension< DimArgs ... > & dim
+            , const T & val
+            , Args ... args ) const
+    {
+      const int n = std::min( buf_len ,
+        snprintf( buf , buf_len
+                , " %lu < %lu %c"
+                , static_cast<unsigned long>(val)
+                , static_cast<unsigned long>( dim.extent( domain_rank ) )
+                , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+      error( buf+n, buf_len-n, domain_rank + 1 , range_rank , dim , args... );
+    }
+
+  // std::pair range
+  template< size_t ... DimArgs , class ... Args >
+  void error( char * buf , int buf_len
+            , unsigned domain_rank
+            , unsigned range_rank
+            , const ViewDimension< DimArgs ... > & dim
+            , const Kokkos::Experimental::Impl::ALL_t 
+            , Args ... args ) const
+    {
+      const int n = std::min( buf_len ,
+        snprintf( buf , buf_len
+                , " Kokkos::ALL %c" 
+                , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+    }
+
+  // std::pair range
+  template< class T , size_t ... DimArgs , class ... Args >
+  void error( char * buf , int buf_len
+            , unsigned domain_rank
+            , unsigned range_rank
+            , const ViewDimension< DimArgs ... > & dim
+            , const std::pair<T,T> & val
+            , Args ... args ) const
+    {
+      // d <= e - b
+      const int n = std::min( buf_len ,
+        snprintf( buf , buf_len
+                , " %lu <= %lu - %lu %c"
+                , static_cast<unsigned long>( dim.extent( domain_rank ) )
+                , static_cast<unsigned long>( val.second )
+                , static_cast<unsigned long>( val.begin )
+                , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+    }
+
+  // Kokkos::pair range
+  template< class T , size_t ... DimArgs , class ... Args >
+  void error( char * buf , int buf_len
+            , unsigned domain_rank
+            , unsigned range_rank
+            , const ViewDimension< DimArgs ... > & dim
+            , const Kokkos::pair<T,T> & val
+            , Args ... args ) const
+    {
+      // d <= e - b
+      const int n = std::min( buf_len ,
+        snprintf( buf , buf_len
+                , " %lu <= %lu - %lu %c"
+                , static_cast<unsigned long>( dim.extent( domain_rank ) )
+                , static_cast<unsigned long>( val.second )
+                , static_cast<unsigned long>( val.begin )
+                , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+    }
+
+  // { begin , end } range
+  template< class T , size_t ... DimArgs , class ... Args >
+  void error( char * buf , int buf_len
+            , unsigned domain_rank
+            , unsigned range_rank
+            , const ViewDimension< DimArgs ... > & dim
+            , const std::initializer_list< T > & val
+            , Args ... args ) const
+    {
+      // d <= e - b
+      int n = 0 ;
+      if ( val.size() == 2 ) {
+        n = std::min( buf_len ,
+          snprintf( buf , buf_len
+                  , " %lu <= %lu - %lu %c"
+                  , static_cast<unsigned long>( dim.extent( domain_rank ) )
+                  , static_cast<unsigned long>( val.begin()[0] )
+                  , static_cast<unsigned long>( val.begin()[1] )
+                  , int( sizeof...(Args) ? ',' : ')' ) ) );
+      }
+      else {
+        n = std::min( buf_len ,
+          snprintf( buf , buf_len
+                  , " { ... }.size() == %u %c"
+                  , unsigned(val.size())
+                  , int( sizeof...(Args) ? ',' : ')' ) ) );
+      }
+
+      error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+    }
+
+  template< size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const
+    {
+#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
+      enum { LEN = 1024 };
+      char buffer[ LEN ];
+
+      const int n = snprintf(buffer,LEN,"Kokkos::subview bounds error (");
+      error( buffer+n , LEN-n , 0 , 0 , dim , args... );
+
+      Kokkos::Impl::throw_runtime_exception(std::string(buffer));
+#else
+      Kokkos::abort("Kokkos::subview bounds error");
+#endif
+    }
+
+#else
+
+  template< size_t ... DimArgs , class ... Args >
+  KOKKOS_FORCEINLINE_FUNCTION
+  void error( const ViewDimension< DimArgs ... > & , Args ... ) const {}
+
+#endif
+
+public:
+
+  template< size_t ... DimArgs , class ... Args >
+  KOKKOS_INLINE_FUNCTION
+  SubviewExtents( const ViewDimension< DimArgs ... > & dim , Args ... args )
+    {
+      static_assert( DomainRank == sizeof...(DimArgs) , "" );
+      static_assert( DomainRank == sizeof...(Args) , "" );
+
+      // Verifies that all arguments, up to 8, are integral types,
+      // integral extents, or don't exist.
+      static_assert( RangeRank ==
+        unsigned( is_integral_extent<0,Args...>::value ) +
+        unsigned( is_integral_extent<1,Args...>::value ) +
+        unsigned( is_integral_extent<2,Args...>::value ) +
+        unsigned( is_integral_extent<3,Args...>::value ) +
+        unsigned( is_integral_extent<4,Args...>::value ) +
+        unsigned( is_integral_extent<5,Args...>::value ) +
+        unsigned( is_integral_extent<6,Args...>::value ) +
+        unsigned( is_integral_extent<7,Args...>::value ) , "" );
+
+      if ( RangeRank == 0 ) { m_length[0] = 0 ; m_index[0] = ~0u ; }
+
+      if ( ! set( 0 , 0 , dim , args... ) ) error( dim , args... );
+    }
+
+  template < typename iType >
+  KOKKOS_FORCEINLINE_FUNCTION
+  constexpr size_t domain_offset( const iType i ) const
+    { return unsigned(i) < DomainRank ? m_begin[i] : 0 ; }
+
+  template < typename iType >
+  KOKKOS_FORCEINLINE_FUNCTION
+  constexpr size_t range_extent( const iType i ) const
+    { return unsigned(i) < InternalRangeRank ? m_length[i] : 0 ; }
+
+  template < typename iType >
+  KOKKOS_FORCEINLINE_FUNCTION
+  constexpr unsigned range_index( const iType i ) const
+    { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; }
+};
+
+}}} // namespace Kokkos::Experimental::Impl
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+/** \brief  Given a value type and dimension generate the View data type */
+template< class T , class Dim >
+struct ViewDataType ;
+
+template< class T >
+struct ViewDataType< T , ViewDimension<> >
+{
+  typedef T type ;
+};
+
+template< class T , size_t ... Args >
+struct ViewDataType< T , ViewDimension< 0 , Args... > >
+{
+  typedef typename ViewDataType<T*,ViewDimension<Args...> >::type type ;
+};
+
+template< class T , size_t N , size_t ... Args >
+struct ViewDataType< T , ViewDimension< N , Args... > >
+{
+  typedef typename ViewDataType<T,ViewDimension<Args...> >::type type[N] ;
+};
+
+/**\brief  Analysis of View data type.
+ *
+ *  Data type conforms to one of the following patterns :
+ *    {const} value_type [][#][#][#]
+ *    {const} value_type ***[#][#][#]
+ *  Where the sum of counts of '*' and '[#]' is at most ten.
+ *
+ *  Provide typedef for the ViewDimension<...> and value_type.
+ */
+template< class T >
+struct ViewArrayAnalysis 
+{
+  typedef T                                      value_type ;
+  typedef typename std::add_const<    T >::type  const_value_type ;
+  typedef typename std::remove_const< T >::type  non_const_value_type ;
+  typedef ViewDimension<>                        static_dimension ;
+  typedef ViewDimension<>                        dynamic_dimension ;
+  typedef ViewDimension<>                        dimension ;
+};
+
+template< class T , size_t N >
+struct ViewArrayAnalysis< T[N] >
+{
+private:
+  typedef ViewArrayAnalysis< T > nested ;
+public:
+  typedef typename nested::value_type            value_type ;
+  typedef typename nested::const_value_type      const_value_type ;
+  typedef typename nested::non_const_value_type  non_const_value_type ;
+
+  typedef typename nested::static_dimension::template prepend<N>::type
+    static_dimension ;
+
+  typedef typename nested::dynamic_dimension dynamic_dimension ;
+
+  typedef typename
+    ViewDimensionJoin< dynamic_dimension , static_dimension >::type
+      dimension ;
+};
+
+template< class T >
+struct ViewArrayAnalysis< T[] >
+{
+private:
+  typedef ViewArrayAnalysis< T > nested ;
+  typedef typename nested::dimension nested_dimension ;
+public:
+  typedef typename nested::value_type            value_type ;
+  typedef typename nested::const_value_type      const_value_type ;
+  typedef typename nested::non_const_value_type  non_const_value_type ;
+
+  typedef typename nested::dynamic_dimension::template prepend<0>::type
+    dynamic_dimension ;
+
+  typedef typename nested::static_dimension static_dimension ;
+
+  typedef typename
+    ViewDimensionJoin< dynamic_dimension , static_dimension >::type
+      dimension ;
+};
+
+template< class T >
+struct ViewArrayAnalysis< T* >
+{
+private:
+  typedef ViewArrayAnalysis< T > nested ;
+public:
+  typedef typename nested::value_type            value_type ;
+  typedef typename nested::const_value_type      const_value_type ;
+  typedef typename nested::non_const_value_type  non_const_value_type ;
+
+  typedef typename nested::dynamic_dimension::template prepend<0>::type
+    dynamic_dimension ;
+
+  typedef typename nested::static_dimension static_dimension ;
+
+  typedef typename
+    ViewDimensionJoin< dynamic_dimension , static_dimension >::type
+      dimension ;
+};
+
+
+template< class DataType , class ArrayLayout , class ValueType >
+struct ViewDataAnalysis
+{
+private:
+
+  typedef ViewArrayAnalysis< DataType > array_analysis ;
+
+  // ValueType is opportunity for partial specialization.
+  // Must match array analysis when this default template is used.
+  static_assert( std::is_same< ValueType , typename array_analysis::non_const_value_type >::value , "" );
+
+public:
+
+  typedef void specialize ; // No specialization
+
+  typedef typename array_analysis::dimension             dimension ;
+  typedef typename array_analysis::value_type            value_type ;
+  typedef typename array_analysis::const_value_type      const_value_type ;
+  typedef typename array_analysis::non_const_value_type  non_const_value_type ;
+
+  // Generate analogous multidimensional array specification type.
+  typedef typename ViewDataType<           value_type , dimension >::type  type ;
+  typedef typename ViewDataType<     const_value_type , dimension >::type  const_type ;
+  typedef typename ViewDataType< non_const_value_type , dimension >::type  non_const_type ;
+
+  // Generate "flattened" multidimensional array specification type.
+  typedef type            scalar_array_type ;
+  typedef const_type      const_scalar_array_type ;
+  typedef non_const_type  non_const_scalar_array_type ;
+};
+
+}}} // namespace Kokkos::Experimental::Impl
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template < class Dimension , class Layout , typename Enable = void >
+struct ViewOffset {
+  using is_mapping_plugin = std::false_type ;
+};
+
+//----------------------------------------------------------------------------
+// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
+template < class Dimension >
+struct ViewOffset< Dimension , Kokkos::LayoutLeft
+                 , typename std::enable_if<( 1 >= Dimension::rank
+                                             ||
+                                             0 == Dimension::rank_dynamic
+                                           )>::type >
+{
+  using is_mapping_plugin = std::true_type ;
+  using is_regular        = std::true_type ;
+
+  typedef size_t             size_type ;
+  typedef Dimension          dimension_type ;
+  typedef Kokkos::LayoutLeft array_layout ;
+
+  dimension_type m_dim ;
+
+  //----------------------------------------
+
+  // rank 1
+  template< typename I0 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 ) const { return i0 ; }
+
+  // rank 2
+  template < typename I0 , typename I1 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 , I1 const & i1 ) const
+    { return i0 + m_dim.N0 * i1 ; }
+
+  //rank 3
+  template < typename I0, typename I1, typename I2 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
+  {
+    return i0 + m_dim.N0 * ( i1 + m_dim.N1 * i2 );
+  }
+
+  //rank 4
+  template < typename I0, typename I1, typename I2, typename I3 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
+  {
+    return i0 + m_dim.N0 * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * i3 ));
+  }
+
+  //rank 5
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4 ) const
+  {
+    return i0 + m_dim.N0 * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * i4 )));
+  }
+
+  //rank 6
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5 ) const
+  {
+    return i0 + m_dim.N0 * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * (
+           i4 + m_dim.N4 * i5 ))));
+  }
+
+  //rank 7
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
+  {
+    return i0 + m_dim.N0 * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * (
+           i4 + m_dim.N4 * (
+           i5 + m_dim.N5 * i6 )))));
+  }
+
+  //rank 8
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6, typename I7 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
+  {
+    return i0 + m_dim.N0 * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * (
+           i4 + m_dim.N4 * (
+           i5 + m_dim.N5 * (
+           i6 + m_dim.N6 * i7 ))))));
+  }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr array_layout layout() const
+    {
+      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
+                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
+
+  /* Cardinality of the domain index space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type size() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  /* Span of the range space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type span() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; }
+
+  /* Strides of dimensions */
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N0 * m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; }
+
+  // Stride with [ rank ] value is the total length
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  void stride( iType * const s ) const
+    {
+      s[0] = 1 ;
+      if ( 0 < dimension_type::rank ) { s[1] = m_dim.N0 ; }
+      if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; }
+      if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; }
+      if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; }
+      if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; }
+      if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; }
+      if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; }
+      if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; }
+    }
+
+  //----------------------------------------
+
+  ViewOffset() = default ;
+  ViewOffset( const ViewOffset & ) = default ;
+  ViewOffset & operator = ( const ViewOffset & ) = default ;
+
+  template< unsigned TrivialScalarSize >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( std::integral_constant<unsigned,TrivialScalarSize> const &
+    , Kokkos::LayoutLeft const & arg_layout
+    )
+    : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 )
+    {}
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    {
+      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
+      // Also requires equal static dimensions ...
+    } 
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
+    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
+    {
+      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
+                   , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" );
+    }
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
+    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
+    {
+      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
+                   , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" );
+      if ( rhs.m_stride.S0 != 1 ) {
+        Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride  requires stride == 1" );
+      }
+    }
+
+  //----------------------------------------
+  // Subview construction
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset(
+    const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
+    const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
+    : m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 )
+    {
+      static_assert( ( 0 == dimension_type::rank ) ||
+                     ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank )
+                   , "ViewOffset subview construction requires compatible rank" );
+    }
+};
+
+//----------------------------------------------------------------------------
+// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
+template < class Dimension >
+struct ViewOffset< Dimension , Kokkos::LayoutLeft
+                 , typename std::enable_if<( 1 < Dimension::rank
+                                             &&
+                                             0 < Dimension::rank_dynamic
+                                           )>::type >
+{
+  using is_mapping_plugin = std::true_type ;
+  using is_regular        = std::true_type ;
+
+  typedef size_t             size_type ;
+  typedef Dimension          dimension_type ;
+  typedef Kokkos::LayoutLeft array_layout ;
+
+  dimension_type m_dim ;
+  size_type      m_stride ;
+
+  //----------------------------------------
+
+  // rank 1
+  template< typename I0 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 ) const { return i0 ; }
+
+  // rank 2
+  template < typename I0 , typename I1 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 , I1 const & i1 ) const
+    { return i0 + m_stride * i1 ; }
+
+  //rank 3
+  template < typename I0, typename I1, typename I2 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
+  {
+    return i0 + m_stride * ( i1 + m_dim.N1 * i2 );
+  }
+
+  //rank 4
+  template < typename I0, typename I1, typename I2, typename I3 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
+  {
+    return i0 + m_stride * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * i3 ));
+  }
+
+  //rank 5
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4 ) const
+  {
+    return i0 + m_stride * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * i4 )));
+  }
+
+  //rank 6
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5 ) const
+  {
+    return i0 + m_stride * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * (
+           i4 + m_dim.N4 * i5 ))));
+  }
+
+  //rank 7
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
+  {
+    return i0 + m_stride * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * (
+           i4 + m_dim.N4 * (
+           i5 + m_dim.N5 * i6 )))));
+  }
+
+  //rank 8
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6, typename I7 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
+  {
+    return i0 + m_stride * (
+           i1 + m_dim.N1 * (
+           i2 + m_dim.N2 * (
+           i3 + m_dim.N3 * (
+           i4 + m_dim.N4 * (
+           i5 + m_dim.N5 * (
+           i6 + m_dim.N6 * i7 ))))));
+  }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr array_layout layout() const
+    {
+      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
+                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
+
+  /* Cardinality of the domain index space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type size() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  /* Span of the range space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type span() const
+    { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N0 ; }
+
+  /* Strides of dimensions */
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride * m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride * m_dim.N1 * m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; }
+
+  // Stride with [ rank ] value is the total length
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  void stride( iType * const s ) const
+    {
+      s[0] = 1 ;
+      if ( 0 < dimension_type::rank ) { s[1] = m_stride ; }
+      if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; }
+      if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; }
+      if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; }
+      if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; }
+      if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; }
+      if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; }
+      if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; }
+    }
+
+  //----------------------------------------
+
+private:
+
+  template< unsigned TrivialScalarSize >
+  struct Padding {
+    enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
+    enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
+
+    // If memory alignment is a multiple of the trivial scalar size then attempt to align.
+    enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 };
+    enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr
+
+    KOKKOS_INLINE_FUNCTION
+    static constexpr size_t stride( size_t const N )
+      {
+        return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) )
+               ? N + align - ( N % div_ok ) : N ;
+      }
+  };
+
+public:
+
+  ViewOffset() = default ;
+  ViewOffset( const ViewOffset & ) = default ;
+  ViewOffset & operator = ( const ViewOffset & ) = default ;
+
+  /* Enable padding for trivial scalar types with non-zero trivial scalar size */
+  template< unsigned TrivialScalarSize >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
+    , Kokkos::LayoutLeft const & arg_layout
+    )
+    : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
+           , arg_layout.dimension[2] , arg_layout.dimension[3]
+           , arg_layout.dimension[4] , arg_layout.dimension[5]
+           , arg_layout.dimension[6] , arg_layout.dimension[7]
+           )
+    , m_stride( Padding<TrivialScalarSize>::stride( arg_layout.dimension[0] ) )
+    {}
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    , m_stride( rhs.stride_1() )
+    {
+      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
+      // Also requires equal static dimensions ...
+    } 
+
+  //----------------------------------------
+  // Subview construction
+  // This subview must be 2 == rank and 2 == rank_dynamic
+  // due to only having stride #0.
+  // The source dimension #0 must be non-zero for stride-one leading dimension.
+  // At most subsequent dimension can be non-zero.
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
+      const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
+    : m_dim( sub.range_extent(0)
+           , sub.range_extent(1)
+           , sub.range_extent(2)
+           , sub.range_extent(3)
+           , sub.range_extent(4)
+           , sub.range_extent(5)
+           , sub.range_extent(6)
+           , sub.range_extent(7))
+    , m_stride( ( 1 == sub.range_index(1) ? rhs.stride_1() :
+                ( 2 == sub.range_index(1) ? rhs.stride_2() :
+                ( 3 == sub.range_index(1) ? rhs.stride_3() :
+                ( 4 == sub.range_index(1) ? rhs.stride_4() :
+                ( 5 == sub.range_index(1) ? rhs.stride_5() :
+                ( 6 == sub.range_index(1) ? rhs.stride_6() :
+                ( 7 == sub.range_index(1) ? rhs.stride_7() : 0 ))))))))
+    {
+      //static_assert( ( 2 == dimension_type::rank ) &&
+      //               ( 2 == dimension_type::rank_dynamic ) &&
+      //               ( 2 <= DimRHS::rank )
+      //             , "ViewOffset subview construction requires compatible rank" );
+    }
+};
+
+//----------------------------------------------------------------------------
+// LayoutRight AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
+template < class Dimension >
+struct ViewOffset< Dimension , Kokkos::LayoutRight
+                 , typename std::enable_if<( 1 >= Dimension::rank
+                                             ||
+                                             0 == Dimension::rank_dynamic
+                                           )>::type >
+{
+  using is_mapping_plugin = std::true_type ;
+  using is_regular        = std::true_type ;
+
+  typedef size_t              size_type ;
+  typedef Dimension           dimension_type ;
+  typedef Kokkos::LayoutRight array_layout ;
+
+  dimension_type m_dim ;
+
+  //----------------------------------------
+
+  // rank 1
+  template< typename I0 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 ) const { return i0 ; }
+
+  // rank 2
+  template < typename I0 , typename I1 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 , I1 const & i1 ) const
+    { return i1 + m_dim.N1 * i0 ; }
+
+  //rank 3
+  template < typename I0, typename I1, typename I2 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
+  {
+    return i2 + m_dim.N2 * ( i1 + m_dim.N1 * ( i0 ));
+  }
+
+  //rank 4
+  template < typename I0, typename I1, typename I2, typename I3 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
+  {
+    return i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * (
+           i1 + m_dim.N1 * ( i0 )));
+  }
+
+  //rank 5
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4 ) const
+  {
+    return i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * (
+           i1 + m_dim.N1 * ( i0 ))));
+  }
+
+  //rank 6
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5 ) const
+  {
+    return i5 + m_dim.N5 * (
+           i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * (
+           i1 + m_dim.N1 * ( i0 )))));
+  }
+
+  //rank 7
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
+  {
+    return i6 + m_dim.N6 * (
+           i5 + m_dim.N5 * (
+           i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * (
+           i1 + m_dim.N1 * ( i0 ))))));
+  }
+
+  //rank 8
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6, typename I7 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
+  {
+    return i7 + m_dim.N7 * (
+           i6 + m_dim.N6 * (
+           i5 + m_dim.N5 * (
+           i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * (
+           i1 + m_dim.N1 * ( i0 )))))));
+  }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr array_layout layout() const
+    {
+      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
+                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
+
+  /* Cardinality of the domain index space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type size() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  /* Span of the range space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type span() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; }
+
+  /* Strides of dimensions */
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; }
+
+  // Stride with [ rank ] value is the total length
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  void stride( iType * const s ) const
+    {
+      size_type n = 1 ;
+      if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; }
+      if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; }
+      if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; }
+      if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; }
+      if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; }
+      if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; }
+      if ( 1 < dimension_type::rank ) { s[1] = n ; n *= m_dim.N1 ; }
+      if ( 0 < dimension_type::rank ) { s[0] = n ; }
+      s[dimension_type::rank] = n * m_dim.N0 ;
+    }
+
+  //----------------------------------------
+
+  ViewOffset() = default ;
+  ViewOffset( const ViewOffset & ) = default ;
+  ViewOffset & operator = ( const ViewOffset & ) = default ;
+
+  template< unsigned TrivialScalarSize >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( std::integral_constant<unsigned,TrivialScalarSize> const &
+    , Kokkos::LayoutRight const & arg_layout
+    )
+    : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 )
+    {}
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    {
+      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
+      // Also requires equal static dimensions ...
+    } 
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
+    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
+    {
+      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
+                   , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" );
+    }
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
+    : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
+    {
+      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
+                   , "ViewOffset LayoutLeft/Right and LayoutStride are only compatible when rank == 1" );
+      if ( rhs.m_stride.S0 != 1 ) {
+        Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft/Right from LayoutStride  requires stride == 1" );
+      }
+    }
+
+  //----------------------------------------
+  // Subview construction
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
+    , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
+    )
+    : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 )
+    {
+      static_assert( ( 0 == dimension_type::rank_dynamic ) ||
+                     ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank )
+                   , "ViewOffset subview construction requires compatible rank" );
+    }
+};
+
+//----------------------------------------------------------------------------
+// LayoutRight AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
+template < class Dimension >
+struct ViewOffset< Dimension , Kokkos::LayoutRight
+                 , typename std::enable_if<( 1 < Dimension::rank
+                                             &&
+                                             0 < Dimension::rank_dynamic
+                                           )>::type >
+{
+  using is_mapping_plugin = std::true_type ;
+  using is_regular        = std::true_type ;
+
+  typedef size_t               size_type ;
+  typedef Dimension            dimension_type ;
+  typedef Kokkos::LayoutRight  array_layout ;
+
+  dimension_type m_dim ;
+  size_type      m_stride ;
+
+  //----------------------------------------
+
+  // rank 1
+  template< typename I0 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 ) const { return i0 ; }
+
+  // rank 2
+  template < typename I0 , typename I1 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 , I1 const & i1 ) const
+  { return i1 + i0 * m_stride ; }
+
+  //rank 3
+  template < typename I0, typename I1, typename I2 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
+  { return i2 + m_dim.N2 * ( i1 ) + i0 * m_stride ; }
+
+  //rank 4
+  template < typename I0, typename I1, typename I2, typename I3 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
+  {
+    return i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * ( i1 )) +
+           i0 * m_stride ;
+  }
+
+  //rank 5
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4 ) const
+  {
+    return i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * ( i1 ))) +
+           i0 * m_stride ;
+  }
+
+  //rank 6
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5 ) const
+  {
+    return i5 + m_dim.N5 * (
+           i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * ( i1 )))) +
+           i0 * m_stride ;
+  }
+
+  //rank 7
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
+  {
+    return i6 + m_dim.N6 * (
+           i5 + m_dim.N5 * (
+           i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * ( i1 ))))) +
+           i0 * m_stride ;
+  }
+
+  //rank 8
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6, typename I7 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
+  {
+    return i7 + m_dim.N7 * (
+           i6 + m_dim.N6 * (
+           i5 + m_dim.N5 * (
+           i4 + m_dim.N4 * (
+           i3 + m_dim.N3 * (
+           i2 + m_dim.N2 * ( i1 )))))) +
+           i0 * m_stride ;
+  }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr array_layout layout() const
+    {
+      return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3
+                         , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 );
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
+
+  /* Cardinality of the domain index space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type size() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  /* Span of the range space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type span() const
+    { return m_dim.N0 * m_stride ; }
+
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
+    { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; }
+
+  /* Strides of dimensions */
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride ; }
+
+  // Stride with [ rank ] value is the total length
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  void stride( iType * const s ) const
+    {
+      size_type n = 1 ;
+      if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; }
+      if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; }
+      if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; }
+      if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; }
+      if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; }
+      if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; }
+      if ( 1 < dimension_type::rank ) { s[1] = n ; }
+      if ( 0 < dimension_type::rank ) { s[0] = m_stride ; }
+      s[dimension_type::rank] = m_stride * m_dim.N0 ;
+    }
+
+  //----------------------------------------
+
+private:
+
+  template< unsigned TrivialScalarSize >
+  struct Padding {
+    enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
+    enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
+
+    // If memory alignment is a multiple of the trivial scalar size then attempt to align.
+    enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 };
+    enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr
+
+    KOKKOS_INLINE_FUNCTION
+    static constexpr size_t stride( size_t const N )
+    {
+      return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) )
+             ? N + align - ( N % div_ok ) : N ;
+    }
+  };
+
+public:
+
+  ViewOffset() = default ;
+  ViewOffset( const ViewOffset & ) = default ;
+  ViewOffset & operator = ( const ViewOffset & ) = default ;
+
+  /* Enable padding for trivial scalar types with non-zero trivial scalar size.  */
+  template< unsigned TrivialScalarSize >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
+    , Kokkos::LayoutRight const & arg_layout
+    )
+    : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
+           , arg_layout.dimension[2] , arg_layout.dimension[3]
+           , arg_layout.dimension[4] , arg_layout.dimension[5]
+           , arg_layout.dimension[6] , arg_layout.dimension[7]
+           )
+    , m_stride( Padding<TrivialScalarSize>::
+                  stride( /* 2 <= rank */
+                          m_dim.N1 * ( dimension_type::rank == 2 ? 1 :
+                          m_dim.N2 * ( dimension_type::rank == 3 ? 1 :
+                          m_dim.N3 * ( dimension_type::rank == 4 ? 1 :
+                          m_dim.N4 * ( dimension_type::rank == 5 ? 1 :
+                          m_dim.N5 * ( dimension_type::rank == 6 ? 1 :
+                          m_dim.N6 * ( dimension_type::rank == 7 ? 1 : m_dim.N7 )))))) ))
+    {}
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    , m_stride( rhs.stride_0() )
+    {
+      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
+      // Also requires equal static dimensions ...
+    } 
+
+  //----------------------------------------
+  // Subview construction
+  // Last dimension must be non-zero
+
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
+    , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
+    )
+    : m_dim( sub.range_extent(0)
+           , sub.range_extent(1)
+           , sub.range_extent(2)
+           , sub.range_extent(3)
+           , sub.range_extent(4)
+           , sub.range_extent(5)
+           , sub.range_extent(6)
+           , sub.range_extent(7))
+    , m_stride( 0 == sub.range_index(0) ? rhs.stride_0() : (
+                1 == sub.range_index(0) ? rhs.stride_1() : (
+                2 == sub.range_index(0) ? rhs.stride_2() : (
+                3 == sub.range_index(0) ? rhs.stride_3() : (
+                4 == sub.range_index(0) ? rhs.stride_4() : (
+                5 == sub.range_index(0) ? rhs.stride_5() : (
+                6 == sub.range_index(0) ? rhs.stride_6() : 0 )))))))
+    {
+/*      // This subview must be 2 == rank and 2 == rank_dynamic
+      // due to only having stride #0.
+      // The source dimension #0 must be non-zero for stride-one leading dimension.
+      // At most subsequent dimension can be non-zero.
+
+      static_assert( (( 2 == dimension_type::rank ) &&
+                      ( 2 <= DimRHS::rank )) ||
+                     ()
+                   , "ViewOffset subview construction requires compatible rank" );
+*/
+    }
+};
+
+//----------------------------------------------------------------------------
+/* Strided array layout only makes sense for 0 < rank */
+/* rank = 0 included for DynRankView case */
+
+template< unsigned Rank >
+struct ViewStride ;
+
+template<>
+struct ViewStride<0> {
+  enum { S0 = 0 , S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t , size_t , size_t , size_t
+                      , size_t , size_t , size_t , size_t )
+    {}
+};
+
+template<>
+struct ViewStride<1> {
+  size_t S0 ;
+  enum { S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t , size_t , size_t
+                      , size_t , size_t , size_t , size_t )
+    : S0( aS0 )
+    {}
+};
+
+template<>
+struct ViewStride<2> {
+  size_t S0 , S1 ;
+  enum { S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t , size_t
+                      , size_t , size_t , size_t , size_t )
+    : S0( aS0 ) , S1( aS1 )
+    {}
+};
+
+template<>
+struct ViewStride<3> {
+  size_t S0 , S1 , S2 ;
+  enum { S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t
+                      , size_t , size_t , size_t , size_t )
+    : S0( aS0 ) , S1( aS1 ) , S2( aS2 )
+    {}
+};
+
+template<>
+struct ViewStride<4> {
+  size_t S0 , S1 , S2 , S3 ;
+  enum { S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
+                      , size_t , size_t , size_t , size_t )
+    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
+    {}
+};
+
+template<>
+struct ViewStride<5> {
+  size_t S0 , S1 , S2 , S3 , S4 ;
+  enum { S5 = 0 , S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
+                      , size_t aS4 , size_t , size_t , size_t )
+    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
+    , S4( aS4 )
+    {}
+};
+
+template<>
+struct ViewStride<6> {
+  size_t S0 , S1 , S2 , S3 , S4 , S5 ;
+  enum { S6 = 0 , S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
+                      , size_t aS4 , size_t aS5 , size_t , size_t )
+    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
+    , S4( aS4 ) , S5( aS5 )
+    {}
+};
+
+template<>
+struct ViewStride<7> {
+  size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 ;
+  enum { S7 = 0 };
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
+                      , size_t aS4 , size_t aS5 , size_t aS6 , size_t )
+    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
+    , S4( aS4 ) , S5( aS5 ) , S6( aS6 )
+    {}
+};
+
+template<>
+struct ViewStride<8> {
+  size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 ;
+
+  ViewStride() = default ;
+  ViewStride( const ViewStride & ) = default ;
+  ViewStride & operator = ( const ViewStride & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
+                      , size_t aS4 , size_t aS5 , size_t aS6 , size_t aS7 )
+    : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
+    , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) , S7( aS7 )
+    {}
+};
+
+template < class Dimension >
+struct ViewOffset< Dimension , Kokkos::LayoutStride
+                 , void >
+{
+private:
+  typedef ViewStride< Dimension::rank >  stride_type ;
+public:
+
+  using is_mapping_plugin = std::true_type ;
+  using is_regular        = std::true_type ;
+
+  typedef size_t                size_type ;
+  typedef Dimension             dimension_type ;
+  typedef Kokkos::LayoutStride  array_layout ;
+
+  dimension_type  m_dim ;
+  stride_type     m_stride ;
+
+  //----------------------------------------
+
+  // rank 1
+  template< typename I0 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 ) const
+  {
+    return i0 * m_stride.S0 ;
+  }
+
+  // rank 2
+  template < typename I0 , typename I1 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0 , I1 const & i1 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 ;
+  }
+
+  //rank 3
+  template < typename I0, typename I1, typename I2 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 +
+           i2 * m_stride.S2 ;
+  }
+
+  //rank 4
+  template < typename I0, typename I1, typename I2, typename I3 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 +
+           i2 * m_stride.S2 +
+           i3 * m_stride.S3 ;
+  }
+
+  //rank 5
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 +
+           i2 * m_stride.S2 +
+           i3 * m_stride.S3 +
+           i4 * m_stride.S4 ;
+  }
+
+  //rank 6
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 +
+           i2 * m_stride.S2 +
+           i3 * m_stride.S3 +
+           i4 * m_stride.S4 +
+           i5 * m_stride.S5 ;
+  }
+
+  //rank 7
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 +
+           i2 * m_stride.S2 +
+           i3 * m_stride.S3 +
+           i4 * m_stride.S4 +
+           i5 * m_stride.S5 +
+           i6 * m_stride.S6 ;
+  }
+
+  //rank 8
+  template < typename I0, typename I1, typename I2, typename I3
+           , typename I4, typename I5, typename I6, typename I7 >
+  KOKKOS_INLINE_FUNCTION constexpr
+  size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
+                      , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
+  {
+    return i0 * m_stride.S0 +
+           i1 * m_stride.S1 +
+           i2 * m_stride.S2 +
+           i3 * m_stride.S3 +
+           i4 * m_stride.S4 +
+           i5 * m_stride.S5 +
+           i6 * m_stride.S6 +
+           i7 * m_stride.S7 ;
+  }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr array_layout layout() const
+    {
+      return array_layout( m_dim.N0 , m_stride.S0
+                         , m_dim.N1 , m_stride.S1
+                         , m_dim.N2 , m_stride.S2
+                         , m_dim.N3 , m_stride.S3
+                         , m_dim.N4 , m_stride.S4
+                         , m_dim.N5 , m_stride.S5
+                         , m_dim.N6 , m_stride.S6
+                         , m_dim.N7 , m_stride.S7
+                         );
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
+
+  /* Cardinality of the domain index space */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type size() const
+    { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+private:
+
+  KOKKOS_INLINE_FUNCTION
+  static constexpr size_type Max( size_type lhs , size_type rhs )
+    { return lhs < rhs ? rhs : lhs ; }
+
+public:
+
+  /* Span of the range space, largest stride * dimension */
+  KOKKOS_INLINE_FUNCTION
+  constexpr size_type span() const
+    {
+      return Max( m_dim.N0 * m_stride.S0 ,
+             Max( m_dim.N1 * m_stride.S1 ,
+             Max( m_dim.N2 * m_stride.S2 ,
+             Max( m_dim.N3 * m_stride.S3 ,
+             Max( m_dim.N4 * m_stride.S4 ,
+             Max( m_dim.N5 * m_stride.S5 ,
+             Max( m_dim.N6 * m_stride.S6 ,
+                  m_dim.N7 * m_stride.S7 )))))));
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return span() == size(); }
+
+  /* Strides of dimensions */
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride.S0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride.S1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride.S2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride.S3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride.S4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride.S5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride.S6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride.S7 ; }
+
+  // Stride with [ rank ] value is the total length
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  void stride( iType * const s ) const
+    {
+      if ( 0 < dimension_type::rank ) { s[0] = m_stride.S0 ; }
+      if ( 1 < dimension_type::rank ) { s[1] = m_stride.S1 ; }
+      if ( 2 < dimension_type::rank ) { s[2] = m_stride.S2 ; }
+      if ( 3 < dimension_type::rank ) { s[3] = m_stride.S3 ; }
+      if ( 4 < dimension_type::rank ) { s[4] = m_stride.S4 ; }
+      if ( 5 < dimension_type::rank ) { s[5] = m_stride.S5 ; }
+      if ( 6 < dimension_type::rank ) { s[6] = m_stride.S6 ; }
+      if ( 7 < dimension_type::rank ) { s[7] = m_stride.S7 ; }
+      s[dimension_type::rank] = span();
+    }
+
+  //----------------------------------------
+
+  ViewOffset() = default ;
+  ViewOffset( const ViewOffset & ) = default ;
+  ViewOffset & operator = ( const ViewOffset & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( std::integral_constant<unsigned,0> const &
+                      , Kokkos::LayoutStride const & rhs )
+    : m_dim( rhs.dimension[0] , rhs.dimension[1] , rhs.dimension[2] , rhs.dimension[3]
+           , rhs.dimension[4] , rhs.dimension[5] , rhs.dimension[6] , rhs.dimension[7] )
+    , m_stride( rhs.stride[0] , rhs.stride[1] , rhs.stride[2] , rhs.stride[3]
+              , rhs.stride[4] , rhs.stride[5] , rhs.stride[6] , rhs.stride[7] )
+    {}
+
+  template< class DimRHS , class LayoutRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    , m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3()
+              , rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() )
+    {
+      static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
+      // Also requires equal static dimensions ...
+    }
+
+  //----------------------------------------
+  // Subview construction
+
+private:
+
+  template< class DimRHS , class LayoutRHS >
+  KOKKOS_INLINE_FUNCTION static
+  constexpr size_t stride
+    ( unsigned r , const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
+    {
+      return r >  7 ? 0 : (
+             r == 0 ? rhs.stride_0() : (
+             r == 1 ? rhs.stride_1() : (
+             r == 2 ? rhs.stride_2() : (
+             r == 3 ? rhs.stride_3() : (
+             r == 4 ? rhs.stride_4() : (
+             r == 5 ? rhs.stride_5() : (
+             r == 6 ? rhs.stride_6() : rhs.stride_7() )))))));
+    }
+
+public:
+
+  template< class DimRHS , class LayoutRHS >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset
+    ( const ViewOffset< DimRHS , LayoutRHS , void > & rhs
+    , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
+    )
+    // range_extent(r) returns 0 when dimension_type::rank <= r
+    : m_dim( sub.range_extent(0)
+           , sub.range_extent(1)
+           , sub.range_extent(2)
+           , sub.range_extent(3)
+           , sub.range_extent(4)
+           , sub.range_extent(5)
+           , sub.range_extent(6)
+           , sub.range_extent(7)
+           )
+    // range_index(r) returns ~0u when dimension_type::rank <= r
+    , m_stride( stride( sub.range_index(0), rhs )
+              , stride( sub.range_index(1), rhs )
+              , stride( sub.range_index(2), rhs )
+              , stride( sub.range_index(3), rhs )
+              , stride( sub.range_index(4), rhs )
+              , stride( sub.range_index(5), rhs )
+              , stride( sub.range_index(6), rhs )
+              , stride( sub.range_index(7), rhs )
+              )
+    {}
+};
+
+}}} // namespace Kokkos::Experimental::Impl
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+/** \brief  ViewDataHandle provides the type of the 'data handle' which the view
+ *          uses to access data with the [] operator. It also provides
+ *          an allocate function and a function to extract a raw ptr from the
+ *          data handle. ViewDataHandle also defines an enum ReferenceAble which
+ *          specifies whether references/pointers to elements can be taken and a
+ *          'return_type' which is what the view operators will give back.
+ *          Specialisation of this object allows three things depending
+ *          on ViewTraits and compiler options:
+ *          (i)   Use special allocator (e.g. huge pages/small pages and pinned memory)
+ *          (ii)  Use special data handle type (e.g. add Cuda Texture Object)
+ *          (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads)
+ */
+template< class Traits , class Enable = void >
+struct ViewDataHandle {
+
+  typedef typename Traits::value_type   value_type  ;
+  typedef typename Traits::value_type * handle_type ;
+  typedef typename Traits::value_type & return_type ;
+  typedef Kokkos::Impl::SharedAllocationTracker  track_type  ;
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( value_type * arg_data_ptr
+                           , track_type const & /*arg_tracker*/ )
+  {
+    return handle_type( arg_data_ptr );
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( handle_type const arg_data_ptr
+                           , size_t offset )
+  {
+    return handle_type( arg_data_ptr + offset );
+  } 
+};
+
+template< class Traits >
+struct ViewDataHandle< Traits ,
+  typename std::enable_if<( std::is_same< typename Traits::non_const_value_type
+                                        , typename Traits::value_type >::value
+                            &&
+                            std::is_same< typename Traits::specialize , void >::value
+                            &&
+                            Traits::memory_traits::Atomic
+                          )>::type >
+{
+  typedef typename Traits::value_type  value_type ;
+  typedef typename Kokkos::Impl::AtomicViewDataHandle< Traits >  handle_type ;
+  typedef typename Kokkos::Impl::AtomicDataElement< Traits >     return_type ;
+  typedef Kokkos::Impl::SharedAllocationTracker                  track_type  ;
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( value_type * arg_data_ptr
+                           , track_type const & /*arg_tracker*/ )
+  {
+    return handle_type( arg_data_ptr );
+  }
+
+  template<class SrcHandleType>
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( const SrcHandleType& arg_handle
+                           , size_t offset )
+  {
+    return handle_type( arg_handle.ptr + offset );
+  }
+};
+
+template< class Traits >
+struct ViewDataHandle< Traits ,
+  typename std::enable_if<( 
+                            std::is_same< typename Traits::specialize , void >::value
+                            &&
+                            (!Traits::memory_traits::Aligned)
+                            &&
+                            Traits::memory_traits::Restrict
+#ifdef KOKKOS_HAVE_CUDA
+                            &&
+                            (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
+                                std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
+#endif
+                            &&
+                            (!Traits::memory_traits::Atomic)
+                          )>::type >
+{
+  typedef typename Traits::value_type  value_type ;
+  typedef typename Traits::value_type * KOKKOS_RESTRICT handle_type ;
+  typedef typename Traits::value_type & KOKKOS_RESTRICT return_type ;
+  typedef Kokkos::Impl::SharedAllocationTracker  track_type  ;
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( value_type * arg_data_ptr
+                           , track_type const & /*arg_tracker*/ )
+  {
+    return handle_type( arg_data_ptr );
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( handle_type const arg_data_ptr
+                           , size_t offset )
+  {
+    return handle_type( arg_data_ptr + offset );
+  }
+};
+
+template< class Traits >
+struct ViewDataHandle< Traits ,
+  typename std::enable_if<( 
+                            std::is_same< typename Traits::specialize , void >::value
+                            &&
+                            Traits::memory_traits::Aligned
+			    &&
+                            (!Traits::memory_traits::Restrict)
+#ifdef KOKKOS_HAVE_CUDA
+                            &&
+                            (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
+                                std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
+#endif
+                            &&
+                            (!Traits::memory_traits::Atomic)
+                          )>::type >
+{
+  typedef typename Traits::value_type  value_type ;
+  typedef typename Traits::value_type * KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ;
+  typedef typename Traits::value_type & return_type ;
+  typedef Kokkos::Impl::SharedAllocationTracker  track_type  ;
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( value_type * arg_data_ptr
+                           , track_type const & /*arg_tracker*/ )
+  {
+    if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) {
+      Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
+    }
+    return handle_type( arg_data_ptr );
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( handle_type const arg_data_ptr
+                           , size_t offset )
+  {
+    if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) {
+      Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
+    }
+    return handle_type( arg_data_ptr + offset );
+  }
+};
+
+template< class Traits >
+struct ViewDataHandle< Traits ,
+  typename std::enable_if<( 
+                            std::is_same< typename Traits::specialize , void >::value
+                            &&
+                            Traits::memory_traits::Aligned
+                            &&
+                            Traits::memory_traits::Restrict
+#ifdef KOKKOS_HAVE_CUDA
+                            &&
+                            (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
+                                std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
+#endif
+                            &&
+                            (!Traits::memory_traits::Atomic)
+                          )>::type >
+{
+  typedef typename Traits::value_type  value_type ;
+  typedef typename Traits::value_type * KOKKOS_RESTRICT KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ;
+  typedef typename Traits::value_type & return_type ;
+  typedef Kokkos::Impl::SharedAllocationTracker  track_type  ;
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( value_type * arg_data_ptr
+                           , track_type const & /*arg_tracker*/ )
+  {
+    if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) {
+      Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
+    }
+    return handle_type( arg_data_ptr );
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static handle_type assign( handle_type const arg_data_ptr
+                           , size_t offset )
+  {
+    if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) {
+      Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
+    }
+    return handle_type( arg_data_ptr + offset );
+  }
+};
+}}} // namespace Kokkos::Experimental::Impl
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+//----------------------------------------------------------------------------
+
+/*
+ *  The construction, assignment to default, and destruction
+ *  are merged into a single functor.
+ *  Primarily to work around an unresolved CUDA back-end bug
+ *  that would lose the destruction cuda device function when
+ *  called from the shared memory tracking destruction.
+ *  Secondarily to have two fewer partial specializations.
+ */
+template< class ExecSpace
+        , class ValueType
+        , bool IsScalar = std::is_scalar< ValueType >::value
+        >
+struct ViewValueFunctor ;
+
+template< class ExecSpace , class ValueType >
+struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
+{
+  typedef Kokkos::RangePolicy< ExecSpace > PolicyType ;
+
+  ExecSpace   space ;
+  ValueType * ptr ;
+  size_t      n ;
+  bool        destroy ;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const size_t i ) const
+    {
+      if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
+      else           { new (ptr+i) ValueType(); }
+    }
+
+  ViewValueFunctor() = default ;
+  ViewValueFunctor( const ViewValueFunctor & ) = default ;
+  ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ;
+
+  ViewValueFunctor( ExecSpace   const & arg_space
+                  , ValueType * const arg_ptr
+                  , size_t      const arg_n )
+    : space( arg_space )
+    , ptr( arg_ptr )
+    , n( arg_n )
+    , destroy( false )
+    {}
+
+  void execute( bool arg )
+    {
+      destroy = arg ;
+      if ( ! space.in_parallel() ) {
+        const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
+          closure( *this , PolicyType( 0 , n ) );
+        closure.execute();
+        space.fence();
+      }
+      else {
+        for ( size_t i = 0 ; i < n ; ++i ) operator()(i);
+      }
+    }
+
+  void construct_shared_allocation()
+    { execute( false ); }
+
+  void destroy_shared_allocation()
+    { execute( true ); }
+};
+
+
+template< class ExecSpace , class ValueType >
+struct ViewValueFunctor< ExecSpace , ValueType , true /* is_scalar */ >
+{
+  typedef Kokkos::RangePolicy< ExecSpace > PolicyType ;
+
+  ExecSpace   space ;
+  ValueType * ptr ;
+  size_t      n ;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const size_t i ) const
+    { ptr[i] = ValueType(); }
+
+  ViewValueFunctor() = default ;
+  ViewValueFunctor( const ViewValueFunctor & ) = default ;
+  ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ;
+
+  ViewValueFunctor( ExecSpace   const & arg_space
+                  , ValueType * const arg_ptr
+                  , size_t      const arg_n )
+    : space( arg_space )
+    , ptr( arg_ptr )
+    , n( arg_n )
+    {}
+
+  void construct_shared_allocation()
+    {
+      if ( ! space.in_parallel() ) {
+        const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
+          closure( *this , PolicyType( 0 , n ) );
+        closure.execute();
+        space.fence();
+      }
+      else {
+        for ( size_t i = 0 ; i < n ; ++i ) operator()(i);
+      }
+    }
+
+  void destroy_shared_allocation() {}
+};
+
+//----------------------------------------------------------------------------
+/** \brief  View mapping for non-specialized data type and standard layout */
+template< class Traits >
+class ViewMapping< Traits ,
+  typename std::enable_if<(
+    std::is_same< typename Traits::specialize , void >::value
+    &&
+    ViewOffset< typename Traits::dimension
+              , typename Traits::array_layout
+              , void >::is_mapping_plugin::value
+  )>::type >
+{
+private:
+
+  template< class , class ... > friend class ViewMapping ;
+  template< class , class ... > friend class Kokkos::View ;
+
+  typedef ViewOffset< typename Traits::dimension
+                    , typename Traits::array_layout
+                    , void
+                    >  offset_type ;
+
+  typedef typename ViewDataHandle< Traits >::handle_type  handle_type ;
+
+  handle_type  m_handle ;
+  offset_type  m_offset ;
+
+  KOKKOS_INLINE_FUNCTION
+  ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
+    : m_handle( arg_handle )
+    , m_offset( arg_offset )
+    {}
+
+public:
+
+  //----------------------------------------
+  // Domain dimensions
+
+  enum { Rank = Traits::dimension::rank };
+
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
+    { return m_offset.m_dim.extent(r); }
+
+  KOKKOS_INLINE_FUNCTION constexpr
+  typename Traits::array_layout layout() const
+    { return m_offset.layout(); }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
+
+  // Is a regular layout with uniform striding for each index.
+  using is_regular = typename offset_type::is_regular ;
+
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
+
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); }
+
+  //----------------------------------------
+  // Range span
+
+  /** \brief  Span of the mapped range */
+  KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); }
+
+  /** \brief  Is the mapped range span contiguous */
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); }
+
+  typedef typename ViewDataHandle< Traits >::return_type  reference_type ;
+  typedef typename Traits::value_type *                   pointer_type ;
+
+  /** \brief  If data references are lvalue_reference than can query pointer to memory */
+  KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
+    {
+      return std::is_lvalue_reference< reference_type >::value
+             ? (pointer_type) m_handle
+             : (pointer_type) 0 ;
+    }
+
+  //----------------------------------------
+  // The View class performs all rank and bounds checking before
+  // calling these element reference methods.
+
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference() const { return m_handle[0]; }
+
+  template< typename I0 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  typename
+    std::enable_if< std::is_integral<I0>::value &&
+                    ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
+                  , reference_type >::type
+  reference( const I0 & i0 ) const { return m_handle[i0]; }
+
+  template< typename I0 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  typename
+    std::enable_if< std::is_integral<I0>::value &&
+                    std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
+                  , reference_type >::type
+  reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; }
+
+  template< typename I0 , typename I1 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 ) const
+    { return m_handle[ m_offset(i0,i1) ]; }
+
+  template< typename I0 , typename I1 , typename I2 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
+    { return m_handle[ m_offset(i0,i1,i2) ]; }
+
+  template< typename I0 , typename I1 , typename I2 , typename I3 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
+    { return m_handle[ m_offset(i0,i1,i2,i3) ]; }
+
+  template< typename I0 , typename I1 , typename I2 , typename I3
+          , typename I4 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+                          , const I4 & i4 ) const
+    { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; }
+
+  template< typename I0 , typename I1 , typename I2 , typename I3
+          , typename I4 , typename I5 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+                          , const I4 & i4 , const I5 & i5 ) const
+    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; }
+
+  template< typename I0 , typename I1 , typename I2 , typename I3
+          , typename I4 , typename I5 , typename I6 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+                          , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
+    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
+
+  template< typename I0 , typename I1 , typename I2 , typename I3
+          , typename I4 , typename I5 , typename I6 , typename I7 >
+  KOKKOS_FORCEINLINE_FUNCTION
+  reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+                          , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
+    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
+
+  //----------------------------------------
+
+private:
+
+  enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
+  enum { MemorySpanSize = sizeof(typename Traits::value_type) };
+
+public:
+
+  /** \brief  Span, in bytes, of the referenced memory */
+  KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
+    {
+      return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
+    }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
+  KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {}
+  KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
+    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
+  KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
+    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
+
+  KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
+    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
+  KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
+    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
+
+  //----------------------------------------
+
+  /**\brief  Span, in bytes, of the required memory */
+  KOKKOS_INLINE_FUNCTION
+  static constexpr size_t memory_span( typename Traits::array_layout const & arg_layout )
+    {
+      typedef std::integral_constant< unsigned , 0 >  padding ;
+      return ( offset_type( padding(), arg_layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+    }
+
+  /**\brief  Wrap a span of memory */
+  template< class ... P >
+  KOKKOS_INLINE_FUNCTION
+  ViewMapping( Kokkos::Impl::ViewCtorProp< P ... > const & arg_prop
+             , typename Traits::array_layout const & arg_layout
+             )
+    : m_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value )
+    , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
+    {}
+
+  //----------------------------------------
+  /*  Allocate and construct mapped array.
+   *  Allocate via shared allocation record and
+   *  return that record for allocation tracking.
+   */
+  template< class ... P >
+  Kokkos::Impl::SharedAllocationRecord<> *
+  allocate_shared( Kokkos::Impl::ViewCtorProp< P... > const & arg_prop
+                 , typename Traits::array_layout const & arg_layout )
+  {
+    typedef Kokkos::Impl::ViewCtorProp< P... > alloc_prop ;
+
+    typedef typename alloc_prop::execution_space  execution_space ;
+    typedef typename Traits::memory_space         memory_space ;
+    typedef typename Traits::value_type           value_type ;
+    typedef ViewValueFunctor< execution_space , value_type > functor_type ;
+    typedef Kokkos::Impl::SharedAllocationRecord< memory_space , functor_type > record_type ;
+
+    // Query the mapping for byte-size of allocation.
+    // If padding is allowed then pass in sizeof value type
+    // for padding computation.
+    typedef std::integral_constant
+      < unsigned
+      , alloc_prop::allow_padding ? sizeof(value_type) : 0
+      > padding ;
+
+    m_offset = offset_type( padding(), arg_layout );
+
+    const size_t alloc_size =
+      ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+
+    // Create shared memory tracking record with allocate memory from the memory space
+    record_type * const record =
+      record_type::allocate( ( (Kokkos::Impl::ViewCtorProp<void,memory_space> const &) arg_prop ).value
+                           , ( (Kokkos::Impl::ViewCtorProp<void,std::string>  const &) arg_prop ).value
+                           , alloc_size );
+
+    //  Only set the the pointer and initialize if the allocation is non-zero.
+    //  May be zero if one of the dimensions is zero.
+    if ( alloc_size ) {
+
+      m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) );
+
+      if ( alloc_prop::initialize ) {
+        // Assume destruction is only required when construction is requested.
+        // The ViewValueFunctor has both value construction and destruction operators.
+        record->m_destroy = functor_type( ( (Kokkos::Impl::ViewCtorProp<void,execution_space> const &) arg_prop).value
+                                        , (value_type *) m_handle
+                                        , m_offset.span()
+                                        );
+
+        // Construct values
+        record->m_destroy.construct_shared_allocation();
+      }
+    }
+
+    return record ;
+  }
+};
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+/** \brief  Assign compatible default mappings */
+
+template< class DstTraits , class SrcTraits >
+class ViewMapping< DstTraits , SrcTraits ,
+  typename std::enable_if<(
+    /* default mappings */
+    std::is_same< typename DstTraits::specialize , void >::value
+    &&
+    std::is_same< typename SrcTraits::specialize , void >::value
+    &&
+    (
+      /* same layout */
+      std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value
+      ||
+      /* known layout */
+      (
+        (
+          std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
+          std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
+          std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
+        )
+        &&
+        (
+          std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
+          std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
+          std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
+        )
+      )
+    )
+  )>::type >
+{
+private:
+
+  enum { is_assignable_space =
+#if 1
+   Kokkos::Impl::MemorySpaceAccess
+     < typename DstTraits::memory_space
+     , typename SrcTraits::memory_space >::assignable };
+#else
+   std::is_same< typename DstTraits::memory_space
+               , typename SrcTraits::memory_space >::value };
+#endif
+
+  enum { is_assignable_value_type =
+    std::is_same< typename DstTraits::value_type
+                , typename SrcTraits::value_type >::value ||
+    std::is_same< typename DstTraits::value_type
+                , typename SrcTraits::const_value_type >::value };
+
+  enum { is_assignable_dimension =
+    ViewDimensionAssignable< typename DstTraits::dimension
+                           , typename SrcTraits::dimension >::value };
+
+  enum { is_assignable_layout =
+    std::is_same< typename DstTraits::array_layout
+                , typename SrcTraits::array_layout >::value ||
+    std::is_same< typename DstTraits::array_layout
+                , Kokkos::LayoutStride >::value ||
+    ( DstTraits::dimension::rank == 0 ) ||
+    ( DstTraits::dimension::rank == 1 &&
+      DstTraits::dimension::rank_dynamic == 1 )
+    };
+
+public:
+
+  enum { is_assignable = is_assignable_space &&
+                         is_assignable_value_type &&
+                         is_assignable_dimension &&
+                         is_assignable_layout };
+
+  typedef Kokkos::Impl::SharedAllocationTracker  TrackType ;
+  typedef ViewMapping< DstTraits , void >  DstType ;
+  typedef ViewMapping< SrcTraits , void >  SrcType ;
+
+  KOKKOS_INLINE_FUNCTION
+  static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
+    {
+      static_assert( is_assignable_space
+                   , "View assignment must have compatible spaces" );
+
+      static_assert( is_assignable_value_type
+                   , "View assignment must have same value type or const = non-const" );
+
+      static_assert( is_assignable_dimension
+                   , "View assignment must have compatible dimensions" );
+
+      static_assert( is_assignable_layout
+                   , "View assignment must have compatible layout or have rank <= 1" );
+
+      typedef typename DstType::offset_type  dst_offset_type ;
+
+      if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) {
+        typedef typename DstTraits::dimension dst_dim;
+        bool assignable =
+          ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN0 == src.dimension_0() : true ) &&
+          ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN1 == src.dimension_1() : true ) &&
+          ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN2 == src.dimension_2() : true ) &&
+          ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN3 == src.dimension_3() : true ) &&
+          ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN4 == src.dimension_4() : true ) &&
+          ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN5 == src.dimension_5() : true ) &&
+          ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN6 == src.dimension_6() : true ) &&
+          ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN7 == src.dimension_7() : true )
+          ;
+        if(!assignable)
+          Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension.");
+      }
+      dst.m_offset = dst_offset_type( src.m_offset );
+      dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track );
+    }
+};
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+// Subview mapping.
+// Deduce destination view type from source view traits and subview arguments
+
+template< class SrcTraits , class ... Args >
+struct ViewMapping
+  < typename std::enable_if<(
+      std::is_same< typename SrcTraits::specialize , void >::value
+      &&
+      (
+        std::is_same< typename SrcTraits::array_layout
+                    , Kokkos::LayoutLeft >::value ||
+        std::is_same< typename SrcTraits::array_layout
+                    , Kokkos::LayoutRight >::value ||
+        std::is_same< typename SrcTraits::array_layout
+                    , Kokkos::LayoutStride >::value
+      )
+    )>::type
+  , SrcTraits
+  , Args ... >
+{
+private:
+
+  static_assert( SrcTraits::rank == sizeof...(Args) ,
+    "Subview mapping requires one argument for each dimension of source View" );
+
+  enum
+    { RZ = false
+    , R0 = bool(is_integral_extent<0,Args...>::value)
+    , R1 = bool(is_integral_extent<1,Args...>::value)
+    , R2 = bool(is_integral_extent<2,Args...>::value)
+    , R3 = bool(is_integral_extent<3,Args...>::value)
+    , R4 = bool(is_integral_extent<4,Args...>::value)
+    , R5 = bool(is_integral_extent<5,Args...>::value)
+    , R6 = bool(is_integral_extent<6,Args...>::value)
+    , R7 = bool(is_integral_extent<7,Args...>::value)
+    };
+
+  enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+              + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
+
+  // Whether right-most rank is a range.
+  enum { R0_rev = ( 0 == SrcTraits::rank ? RZ : (
+                    1 == SrcTraits::rank ? R0 : (
+                    2 == SrcTraits::rank ? R1 : (
+                    3 == SrcTraits::rank ? R2 : (
+                    4 == SrcTraits::rank ? R3 : (
+                    5 == SrcTraits::rank ? R4 : (
+                    6 == SrcTraits::rank ? R5 : (
+                    7 == SrcTraits::rank ? R6 : R7 )))))))) };
+
+  // Subview's layout
+  typedef typename std::conditional<
+      ( /* Same array layout IF */
+        ( rank == 0 ) /* output rank zero */
+        ||
+        SubviewLegalArgsCompileTime<typename SrcTraits::array_layout, typename SrcTraits::array_layout,
+                                    rank, SrcTraits::rank, 0, Args...>::value 
+        ||
+        // OutputRank 1 or 2, InputLayout Left, Interval 0
+        // because single stride one or second index has a stride.
+        ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ) //replace with input rank
+        ||
+        // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
+        // because single stride one or second index has a stride.
+        ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ) //replace input rank
+      ), typename SrcTraits::array_layout , Kokkos::LayoutStride
+      >::type array_layout ;
+
+  typedef typename SrcTraits::value_type  value_type ;
+
+  typedef typename std::conditional< rank == 0 , value_type ,
+          typename std::conditional< rank == 1 , value_type * ,
+          typename std::conditional< rank == 2 , value_type ** ,
+          typename std::conditional< rank == 3 , value_type *** ,
+          typename std::conditional< rank == 4 , value_type **** ,
+          typename std::conditional< rank == 5 , value_type ***** ,
+          typename std::conditional< rank == 6 , value_type ****** ,
+          typename std::conditional< rank == 7 , value_type ******* ,
+                                                 value_type ********
+          >::type >::type >::type >::type >::type >::type >::type >::type
+     data_type ;
+
+public:
+
+  typedef Kokkos::ViewTraits
+    < data_type
+    , array_layout 
+    , typename SrcTraits::device_type
+    , typename SrcTraits::memory_traits > traits_type ;
+
+  typedef Kokkos::View
+    < data_type
+    , array_layout 
+    , typename SrcTraits::device_type
+    , typename SrcTraits::memory_traits > type ;
+
+  template< class MemoryTraits >
+  struct apply {
+
+    static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
+
+    typedef Kokkos::ViewTraits
+      < data_type 
+      , array_layout
+      , typename SrcTraits::device_type
+      , MemoryTraits > traits_type ;
+
+    typedef Kokkos::View
+      < data_type 
+      , array_layout
+      , typename SrcTraits::device_type
+      , MemoryTraits > type ;
+  };
+
+  // The presumed type is 'ViewMapping< traits_type , void >'
+  // However, a compatible ViewMapping is acceptable.
+  template< class DstTraits >
+  KOKKOS_INLINE_FUNCTION
+  static void assign( ViewMapping< DstTraits , void > & dst
+                    , ViewMapping< SrcTraits , void > const & src
+                    , Args ... args )
+    {
+      static_assert(
+        ViewMapping< DstTraits , traits_type , void >::is_assignable ,
+        "Subview destination type must be compatible with subview derived type" );
+
+      typedef ViewMapping< DstTraits , void >  DstType ;
+
+      typedef typename DstType::offset_type  dst_offset_type ;
+
+      const SubviewExtents< SrcTraits::rank , rank >
+        extents( src.m_offset.m_dim , args... );
+
+      dst.m_offset = dst_offset_type( src.m_offset , extents );
+
+      dst.m_handle = ViewDataHandle< DstTraits >::assign(src.m_handle,
+          src.m_offset( extents.domain_offset(0)
+                      , extents.domain_offset(1)
+                      , extents.domain_offset(2)
+                      , extents.domain_offset(3)
+                      , extents.domain_offset(4)
+                      , extents.domain_offset(5)
+                      , extents.domain_offset(6)
+                      , extents.domain_offset(7)
+          ));
+    }
+};
+
+
+
+//----------------------------------------------------------------------------
+
+}}} // namespace Kokkos::Experimental::Impl
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+template< unsigned , class MapType >
+KOKKOS_INLINE_FUNCTION
+bool view_verify_operator_bounds( const MapType & )
+{ return true ; }
+
+template< unsigned R , class MapType , class iType , class ... Args >
+KOKKOS_INLINE_FUNCTION
+bool view_verify_operator_bounds
+  ( const MapType & map
+  , const iType   & i
+  , Args ... args
+  )
+{
+  return ( size_t(i) < map.extent(R) )
+         && view_verify_operator_bounds<R+1>( map , args ... );
+}
+
+template< unsigned , class MapType >
+inline
+void view_error_operator_bounds( char * , int , const MapType & )
+{}
+
+template< unsigned R , class MapType , class iType , class ... Args >
+inline
+void view_error_operator_bounds
+  ( char * buf
+  , int len
+  , const MapType & map
+  , const iType   & i
+  , Args ... args
+  )
+{
+  const int n =
+    snprintf(buf,len," %ld < %ld %c"
+            , static_cast<unsigned long>(i)
+            , static_cast<unsigned long>( map.extent(R) )
+            , ( sizeof...(Args) ? ',' : ')' )
+            );
+  view_error_operator_bounds<R+1>(buf+n,len-n,map,args...);
+}
+
+template< class MapType , class ... Args >
+KOKKOS_INLINE_FUNCTION
+void view_verify_operator_bounds
+  ( const char* label , const MapType & map , Args ... args )
+{
+  if ( ! view_verify_operator_bounds<0>( map , args ... ) ) {
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+    enum { LEN = 1024 };
+    char buffer[ LEN ];
+    int n = snprintf(buffer,LEN,"View bounds error of view %s (", label);
+    view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... );
+    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
+#else
+    Kokkos::abort("View bounds error");
+#endif
+  }
+}
+
+} /* namespace Impl */
+} /* namespace Kokkos */
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP */
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp
deleted file mode 100644
index 5748e722c0076e9f47a7c538bd4d2b6f7458e9b8..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp
+++ /dev/null
@@ -1,1341 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_VIEWOFFSET_HPP
-#define KOKKOS_VIEWOFFSET_HPP
-
-#include <Kokkos_Pair.hpp>
-#include <Kokkos_Layout.hpp>
-#include <impl/Kokkos_Traits.hpp>
-#include <impl/Kokkos_Shape.hpp>
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos { namespace Impl {
-
-template < class ShapeType , class LayoutType , typename Enable = void >
-struct ViewOffset ;
-
-//----------------------------------------------------------------------------
-// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
-template < class ShapeType >
-struct ViewOffset< ShapeType , LayoutLeft
-                 , typename enable_if<( 1 >= ShapeType::rank
-                                        ||
-                                        0 == ShapeType::rank_dynamic
-                                      )>::type >
-  : public ShapeType
-{
-  typedef size_t     size_type ;
-  typedef ShapeType  shape_type ;
-  typedef LayoutLeft array_layout ;
-
-  enum { has_padding = false };
-
-  template< unsigned R >
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n )
-    { assign_shape_dimension<R>( *this , n ); }
-
-  // Return whether the subview introduced noncontiguity
-  template< class S , class L >
-  KOKKOS_INLINE_FUNCTION
-  typename Impl::enable_if<( 0 == shape_type::rank &&
-                             Impl::is_same<L,LayoutLeft>::value
-                           ), bool >::type
-  assign_subview( const ViewOffset<S,L,void> &
-                , const size_t n0
-                , const size_t n1
-                , const size_t n2
-                , const size_t n3
-                , const size_t n4
-                , const size_t n5
-                , const size_t n6
-                , const size_t n7
-                )
-    {
-      return false ; // did not introduce noncontiguity
-    }
-
-  // This subview must be 1 == rank and 1 == rank_dynamic.
-  // The source dimension #0 must be non-zero and all other dimensions are zero.
-  // Return whether the subview introduced noncontiguity
-  template< class S , class L >
-  KOKKOS_INLINE_FUNCTION
-  typename Impl::enable_if<( 1 == shape_type::rank &&
-                             1 == shape_type::rank_dynamic &&
-                             1 <= S::rank &&
-                             Impl::is_same<L,LayoutLeft>::value
-                           ), bool >::type
-  assign_subview( const ViewOffset<S,L,void> &
-                , const size_t n0
-                , const size_t n1
-                , const size_t n2
-                , const size_t n3
-                , const size_t n4
-                , const size_t n5
-                , const size_t n6
-                , const size_t n7
-                )
-    {
-      // n1 .. n7 must be zero
-      shape_type::N0 = n0 ;
-      return false ; // did not introduce noncontiguity
-    }
-
-
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3
-             , size_t n4 , size_t n5 , size_t n6 , size_t n7
-             , size_t = 0 )
-    { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic)
-                                  )>::type * = 0 )
-    { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs
-             , typename enable_if<( 1 == int(ShapeRHS::rank)
-                                    &&
-                                    1 == int(shape_type::rank)
-                                    &&
-                                    1 == int(shape_type::rank_dynamic)
-                                  )>::type * = 0 )
-    { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); }
-
-  KOKKOS_INLINE_FUNCTION
-  void set_padding() {}
-
-  KOKKOS_INLINE_FUNCTION
-  size_type cardinality() const
-    { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type capacity() const
-    { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  // Stride with [ rank ] value is the total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      s[0] = 1 ;
-      if ( 0 < shape_type::rank ) { s[1] = shape_type::N0 ; }
-      if ( 1 < shape_type::rank ) { s[2] = s[1] * shape_type::N1 ; }
-      if ( 2 < shape_type::rank ) { s[3] = s[2] * shape_type::N2 ; }
-      if ( 3 < shape_type::rank ) { s[4] = s[3] * shape_type::N3 ; }
-      if ( 4 < shape_type::rank ) { s[5] = s[4] * shape_type::N4 ; }
-      if ( 5 < shape_type::rank ) { s[6] = s[5] * shape_type::N5 ; }
-      if ( 6 < shape_type::rank ) { s[7] = s[6] * shape_type::N6 ; }
-      if ( 7 < shape_type::rank ) { s[8] = s[7] * shape_type::N7 ; }
-    }
-
-  KOKKOS_INLINE_FUNCTION size_type stride_0() const { return 1 ; }
-  KOKKOS_INLINE_FUNCTION size_type stride_1() const { return shape_type::N0 ; }
-  KOKKOS_INLINE_FUNCTION size_type stride_2() const { return shape_type::N0 * shape_type::N1 ; }
-  KOKKOS_INLINE_FUNCTION size_type stride_3() const { return shape_type::N0 * shape_type::N1 * shape_type::N2 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_4() const
-    { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_5() const
-    { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_6() const
-    { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_7() const
-    { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 ; }
-
-  // rank 1
-  template< typename I0 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const & i0 ) const { return i0 ; }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const & i0 , I1 const & i1 ) const
-    { return i0 + shape_type::N0 * i1 ; }
-
-  //rank 3
-  template <typename I0, typename I1, typename I2>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0
-                      , I1 const& i1
-                      , I2 const& i2
-                      ) const
-    {
-      return i0 + shape_type::N0 * (
-             i1 + shape_type::N1 * i2 );
-    }
-
-  //rank 4
-  template <typename I0, typename I1, typename I2, typename I3>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3 ) const
-    {
-      return i0 + shape_type::N0 * (
-             i1 + shape_type::N1 * (
-             i2 + shape_type::N2 * i3 ));
-    }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4 ) const
-    {
-      return i0 + shape_type::N0 * (
-             i1 + shape_type::N1 * (
-             i2 + shape_type::N2 * (
-             i3 + shape_type::N3 * i4 )));
-    }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5 ) const
-    {
-      return i0 + shape_type::N0 * (
-             i1 + shape_type::N1 * (
-             i2 + shape_type::N2 * (
-             i3 + shape_type::N3 * (
-             i4 + shape_type::N4 * i5 ))));
-    }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6) const
-  {
-    return i0 + shape_type::N0 * (
-           i1 + shape_type::N1 * (
-           i2 + shape_type::N2 * (
-           i3 + shape_type::N3 * (
-           i4 + shape_type::N4 * (
-           i5 + shape_type::N5 * i6 )))));
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7) const
-  {
-    return i0 + shape_type::N0 * (
-           i1 + shape_type::N1 * (
-           i2 + shape_type::N2 * (
-           i3 + shape_type::N3 * (
-           i4 + shape_type::N4 * (
-           i5 + shape_type::N5 * (
-           i6 + shape_type::N6 * i7 ))))));
-  }
-};
-
-//----------------------------------------------------------------------------
-// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
-template < class ShapeType >
-struct ViewOffset< ShapeType , LayoutLeft
-                 , typename enable_if<( 1 < ShapeType::rank
-                                        &&
-                                        0 < ShapeType::rank_dynamic
-                                      )>::type >
-  : public ShapeType
-{
-  typedef size_t     size_type ;
-  typedef ShapeType  shape_type ;
-  typedef LayoutLeft array_layout ;
-
-  enum { has_padding = true };
-
-  size_type S0 ;
-
-  // This subview must be 2 == rank and 2 == rank_dynamic
-  // due to only having stride #0.
-  // The source dimension #0 must be non-zero for stride-one leading dimension.
-  // At most subsequent dimension can be non-zero.
-  // Return whether the subview introduced noncontiguity.
-  template< class S , class L >
-  KOKKOS_INLINE_FUNCTION
-  typename Impl::enable_if<( 2 == shape_type::rank &&
-                             2 == shape_type::rank_dynamic &&
-                             2 <= S::rank &&
-                             Impl::is_same<L,LayoutLeft>::value
-                           ), bool >::type
-  assign_subview( const ViewOffset<S,L,void> & rhs
-                , const size_t n0
-                , const size_t n1
-                , const size_t n2
-                , const size_t n3
-                , const size_t n4
-                , const size_t n5
-                , const size_t n6
-                , const size_t n7
-                )
-    {
-      // N1 = second non-zero dimension
-      // S0 = stride for second non-zero dimension
-      shape_type::N0 = n0 ;
-      shape_type::N1 = 0 ;
-      S0 = 0 ;
-
-      if      (                n1 ) { shape_type::N1 = n1 ; S0 = rhs.stride_1(); }
-      else if ( 2 < S::rank && n2 ) { shape_type::N1 = n2 ; S0 = rhs.stride_2(); }
-      else if ( 3 < S::rank && n3 ) { shape_type::N1 = n3 ; S0 = rhs.stride_3(); }
-      else if ( 4 < S::rank && n4 ) { shape_type::N1 = n4 ; S0 = rhs.stride_4(); }
-      else if ( 5 < S::rank && n5 ) { shape_type::N1 = n5 ; S0 = rhs.stride_5(); }
-      else if ( 6 < S::rank && n6 ) { shape_type::N1 = n6 ; S0 = rhs.stride_6(); }
-      else if ( 7 < S::rank && n7 ) { shape_type::N1 = n7 ; S0 = rhs.stride_7(); }
-
-      // Introduce noncontiguity if change the first dimension
-      // or took a range of a dimension after the second.
-      return ( size_t(shape_type::N0) != size_t(rhs.N0) ) || ( 0 == n1 );
-    }
-
-
-  template< unsigned R >
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n )
-    { assign_shape_dimension<R>( *this , n ); }
-
-
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3
-             , size_t n4 , size_t n5 , size_t n6 , size_t n7
-             , size_t = 0 )
-    { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); S0 = shape_type::N0 ; }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) == 0
-                                  )>::type * = 0 )
-    {
-      shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 );
-      S0 = shape_type::N0 ; // No padding when dynamic_rank == 0
-    }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) > 0
-                                  )>::type * = 0 )
-    {
-      shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 );
-      S0 = rhs.S0 ; // possibly padding when dynamic rank > 0
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void set_padding()
-    {
-      enum { div   = MEMORY_ALIGNMENT / shape_type::scalar_size };
-      enum { mod   = MEMORY_ALIGNMENT % shape_type::scalar_size };
-      enum { align = 0 == mod ? div : 0 };
-
-      if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < S0 ) {
-
-        const size_type count_mod = S0 % ( div ? div : 1 );
-
-        if ( count_mod ) { S0 += align - count_mod ; }
-      }
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type cardinality() const
-    { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type capacity() const
-    { return size_type(S0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  // Stride with [ rank ] as total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      s[0] = 1 ;
-      if ( 0 < shape_type::rank ) { s[1] = S0 ; }
-      if ( 1 < shape_type::rank ) { s[2] = s[1] * shape_type::N1 ; }
-      if ( 2 < shape_type::rank ) { s[3] = s[2] * shape_type::N2 ; }
-      if ( 3 < shape_type::rank ) { s[4] = s[3] * shape_type::N3 ; }
-      if ( 4 < shape_type::rank ) { s[5] = s[4] * shape_type::N4 ; }
-      if ( 5 < shape_type::rank ) { s[6] = s[5] * shape_type::N5 ; }
-      if ( 6 < shape_type::rank ) { s[7] = s[6] * shape_type::N6 ; }
-      if ( 7 < shape_type::rank ) { s[8] = s[7] * shape_type::N7 ; }
-    }
-
-  KOKKOS_INLINE_FUNCTION size_type stride_0() const { return 1 ; }
-  KOKKOS_INLINE_FUNCTION size_type stride_1() const { return S0 ; }
-  KOKKOS_INLINE_FUNCTION size_type stride_2() const { return S0 * shape_type::N1 ; }
-  KOKKOS_INLINE_FUNCTION size_type stride_3() const { return S0 * shape_type::N1 * shape_type::N2 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_4() const
-    { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_5() const
-    { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_6() const
-    { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_7() const
-    { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 ; }
-
-  // rank 2
-  template < typename I0 , typename I1 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const & i0 , I1 const & i1) const
-    { return i0 + S0 * i1 ; }
-
-  //rank 3
-  template <typename I0, typename I1, typename I2>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const
-    {
-      return i0 + S0 * (
-             i1 + shape_type::N1 * i2 );
-    }
-
-  //rank 4
-  template <typename I0, typename I1, typename I2, typename I3>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3 ) const
-    {
-      return i0 + S0 * (
-             i1 + shape_type::N1 * (
-             i2 + shape_type::N2 * i3 ));
-    }
-
-  //rank 5
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4 ) const
-    {
-      return i0 + S0 * (
-             i1 + shape_type::N1 * (
-             i2 + shape_type::N2 * (
-             i3 + shape_type::N3 * i4 )));
-    }
-
-  //rank 6
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5 ) const
-    {
-      return i0 + S0 * (
-             i1 + shape_type::N1 * (
-             i2 + shape_type::N2 * (
-             i3 + shape_type::N3 * (
-             i4 + shape_type::N4 * i5 ))));
-    }
-
-  //rank 7
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const
-  {
-    return i0 + S0 * (
-           i1 + shape_type::N1 * (
-           i2 + shape_type::N2 * (
-           i3 + shape_type::N3 * (
-           i4 + shape_type::N4 * (
-           i5 + shape_type::N5 * i6 )))));
-  }
-
-  //rank 8
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const
-  {
-    return i0 + S0 * (
-           i1 + shape_type::N1 * (
-           i2 + shape_type::N2 * (
-           i3 + shape_type::N3 * (
-           i4 + shape_type::N4 * (
-           i5 + shape_type::N5 * (
-           i6 + shape_type::N6 * i7 ))))));
-  }
-};
-
-//----------------------------------------------------------------------------
-// LayoutRight AND ( 1 >= rank OR 1 >= rank_dynamic ) : no padding / striding
-template < class ShapeType >
-struct ViewOffset< ShapeType , LayoutRight
-                 , typename enable_if<( 1 >= ShapeType::rank
-                                        ||
-                                        1 >= ShapeType::rank_dynamic
-                                      )>::type >
-  : public ShapeType
-{
-  typedef size_t       size_type;
-  typedef ShapeType    shape_type;
-  typedef LayoutRight  array_layout ;
-
-  enum { has_padding = false };
-
-  // This subview must be 1 == rank and 1 == rank_dynamic
-  // The source view's last dimension must be non-zero
-  // Return whether the subview introduced noncontiguity
-  template< class S , class L >
-  KOKKOS_INLINE_FUNCTION
-  typename Impl::enable_if<( 0 == shape_type::rank &&
-                             Impl::is_same<L,LayoutRight>::value
-                           ), bool >::type
-  assign_subview( const ViewOffset<S,L,void> &
-                , const size_t n0
-                , const size_t n1
-                , const size_t n2
-                , const size_t n3
-                , const size_t n4
-                , const size_t n5
-                , const size_t n6
-                , const size_t n7
-                )
-    { return false ; }
-
-  // This subview must be 1 == rank and 1 == rank_dynamic
-  // The source view's last dimension must be non-zero
-  // Return whether the subview introduced noncontiguity
-  template< class S , class L >
-  KOKKOS_INLINE_FUNCTION
-  typename Impl::enable_if<( 1 == shape_type::rank &&
-                             1 == shape_type::rank_dynamic &&
-                             1 <= S::rank &&
-                             Impl::is_same<L,LayoutRight>::value
-                           ), bool >::type
-  assign_subview( const ViewOffset<S,L,void> &
-                , const size_t n0
-                , const size_t n1
-                , const size_t n2
-                , const size_t n3
-                , const size_t n4
-                , const size_t n5
-                , const size_t n6
-                , const size_t n7
-                )
-    {
-      shape_type::N0 = S::rank == 1 ? n0 : (
-                       S::rank == 2 ? n1 : (
-                       S::rank == 3 ? n2 : (
-                       S::rank == 4 ? n3 : (
-                       S::rank == 5 ? n4 : (
-                       S::rank == 6 ? n5 : (
-                       S::rank == 7 ? n6 : n7 ))))));
-      // should have n0 .. n_(rank-2) equal zero
-      return false ;
-    }
-
-  template< unsigned R >
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n )
-    { assign_shape_dimension<R>( *this , n ); }
-
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3
-             , size_t n4 , size_t n5 , size_t n6 , size_t n7
-             , size_t = 0 )
-    { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic)
-                                  )>::type * = 0 )
-    { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs
-             , typename enable_if<( 1 == int(ShapeRHS::rank)
-                                    &&
-                                    1 == int(shape_type::rank)
-                                    &&
-                                    1 == int(shape_type::rank_dynamic)
-                                  )>::type * = 0 )
-    { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); }
-
-  KOKKOS_INLINE_FUNCTION
-  void set_padding() {}
-
-  KOKKOS_INLINE_FUNCTION
-  size_type cardinality() const
-    { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type capacity() const
-    { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  size_type stride_R() const
-    {
-      return size_type(shape_type::N1) * shape_type::N2 * shape_type::N3 *
-             shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ;
-    };
-
-  // Stride with [rank] as total length
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      size_type n = 1 ;
-      if ( 7 < shape_type::rank ) { s[7] = n ; n *= shape_type::N7 ; }
-      if ( 6 < shape_type::rank ) { s[6] = n ; n *= shape_type::N6 ; }
-      if ( 5 < shape_type::rank ) { s[5] = n ; n *= shape_type::N5 ; }
-      if ( 4 < shape_type::rank ) { s[4] = n ; n *= shape_type::N4 ; }
-      if ( 3 < shape_type::rank ) { s[3] = n ; n *= shape_type::N3 ; }
-      if ( 2 < shape_type::rank ) { s[2] = n ; n *= shape_type::N2 ; }
-      if ( 1 < shape_type::rank ) { s[1] = n ; n *= shape_type::N1 ; }
-      if ( 0 < shape_type::rank ) { s[0] = n ; }
-      s[shape_type::rank] = n * shape_type::N0 ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_7() const { return 1 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_6() const { return shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_5() const { return shape_type::N7 * shape_type::N6 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_4() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_3() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_2() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_1() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_0() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 * shape_type::N1 ; }
-
-  // rank 1
-  template <typename I0>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0) const
-    {
-      return i0 ;
-    }
-
-  // rank 2
-  template <typename I0, typename I1>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1 ) const
-    {
-      return i1 + shape_type::N1 * i0 ;
-    }
-
-  template <typename I0, typename I1, typename I2>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const
-    {
-      return i2 + shape_type::N2 * (
-             i1 + shape_type::N1 * ( i0 ));
-    }
-
-  template <typename I0, typename I1, typename I2, typename I3>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const
-    {
-      return i3 + shape_type::N3 * (
-             i2 + shape_type::N2 * (
-             i1 + shape_type::N1 * ( i0 )));
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const
-    {
-      return i4 + shape_type::N4 * (
-             i3 + shape_type::N3 * (
-             i2 + shape_type::N2 * (
-             i1 + shape_type::N1 * ( i0 ))));
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const
-  {
-    return i5 + shape_type::N5 * (
-           i4 + shape_type::N4 * (
-           i3 + shape_type::N3 * (
-           i2 + shape_type::N2 * (
-           i1 + shape_type::N1 * ( i0 )))));
-  }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const
-  {
-    return i6 + shape_type::N6 * (
-           i5 + shape_type::N5 * (
-           i4 + shape_type::N4 * (
-           i3 + shape_type::N3 * (
-           i2 + shape_type::N2 * (
-           i1 + shape_type::N1 * ( i0 ))))));
-  }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const
-  {
-    return i7 + shape_type::N7 * (
-           i6 + shape_type::N6 * (
-           i5 + shape_type::N5 * (
-           i4 + shape_type::N4 * (
-           i3 + shape_type::N3 * (
-           i2 + shape_type::N2 * (
-           i1 + shape_type::N1 * ( i0 )))))));
-  }
-};
-
-//----------------------------------------------------------------------------
-// LayoutRight AND ( 1 < rank AND 1 < rank_dynamic ) : has padding / striding
-template < class ShapeType >
-struct ViewOffset< ShapeType , LayoutRight
-                 , typename enable_if<( 1 < ShapeType::rank
-                                        &&
-                                        1 < ShapeType::rank_dynamic
-                                      )>::type >
-  : public ShapeType
-{
-  typedef size_t       size_type;
-  typedef ShapeType    shape_type;
-  typedef LayoutRight  array_layout ;
-
-  enum { has_padding = true };
-
-  size_type SR ;
-
-  // This subview must be 2 == rank and 2 == rank_dynamic
-  // due to only having stride #(rank-1).
-  // The source dimension #(rank-1) must be non-zero for stride-one leading dimension.
-  // At most one prior dimension can be non-zero.
-  // Return whether the subview introduced noncontiguity.
-  template< class S , class L >
-  KOKKOS_INLINE_FUNCTION
-  typename Impl::enable_if<( 2 == shape_type::rank &&
-                             2 == shape_type::rank_dynamic &&
-                             2 <= S::rank &&
-                             Impl::is_same<L,LayoutRight>::value
-                           ), bool >::type
-  assign_subview( const ViewOffset<S,L,void> & rhs
-                , const size_t n0
-                , const size_t n1
-                , const size_t n2
-                , const size_t n3
-                , const size_t n4
-                , const size_t n5
-                , const size_t n6
-                , const size_t n7
-                )
-    {
-      const size_type nR = S::rank == 2 ? n1 : (
-                           S::rank == 3 ? n2 : (
-                           S::rank == 4 ? n3 : (
-                           S::rank == 5 ? n4 : (
-                           S::rank == 6 ? n5 : (
-                           S::rank == 7 ? n6 : n7 )))));
-
-      // N0 = first non-zero-dimension
-      // N1 = last non-zero dimension
-      // SR = stride for second non-zero dimension
-      shape_type::N0 = 0 ;
-      shape_type::N1 = nR ;
-      SR = 0 ;
-
-      if      (                n0 ) { shape_type::N0 = n0 ; SR = rhs.stride_0(); }
-      else if ( 2 < S::rank && n1 ) { shape_type::N0 = n1 ; SR = rhs.stride_1(); }
-      else if ( 3 < S::rank && n2 ) { shape_type::N0 = n2 ; SR = rhs.stride_2(); }
-      else if ( 4 < S::rank && n3 ) { shape_type::N0 = n3 ; SR = rhs.stride_3(); }
-      else if ( 5 < S::rank && n4 ) { shape_type::N0 = n4 ; SR = rhs.stride_4(); }
-      else if ( 6 < S::rank && n5 ) { shape_type::N0 = n5 ; SR = rhs.stride_5(); }
-      else if ( 7 < S::rank && n6 ) { shape_type::N0 = n6 ; SR = rhs.stride_6(); }
-
-      // Introduce noncontiguous if change the last dimension
-      // or take a range of a dimension other than the second-to-last dimension.
-
-      return 2 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N1) || 0 == n0 ) : (
-             3 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N2) || 0 == n1 ) : (
-             4 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N3) || 0 == n2 ) : (
-             5 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N4) || 0 == n3 ) : (
-             6 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N5) || 0 == n4 ) : (
-             7 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N6) || 0 == n5 ) : (
-                            ( size_t(shape_type::N1) != size_t(rhs.N7) || 0 == n6 ) ))))));
-    }
-
-  template< unsigned R >
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n )
-    { assign_shape_dimension<R>( *this , n ); }
-
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3
-             , size_t n4 , size_t n5 , size_t n6 , size_t n7
-             , size_t = 0 )
-    {
-      shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 );
-      SR = size_type(shape_type::N1) * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ;
-    }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= 1
-                                  )>::type * = 0 )
-    {
-      shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 );
-      SR = shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ;
-    }
-
-  template< class ShapeRHS >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic)
-                                    &&
-                                    int(ShapeRHS::rank_dynamic) > 1
-                                  )>::type * = 0 )
-    {
-      shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 );
-      SR = rhs.SR ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void set_padding()
-    {
-      enum { div   = MEMORY_ALIGNMENT / shape_type::scalar_size };
-      enum { mod   = MEMORY_ALIGNMENT % shape_type::scalar_size };
-      enum { align = 0 == mod ? div : 0 };
-
-      if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < SR ) {
-
-        const size_type count_mod = SR % ( div ? div : 1 );
-
-        if ( count_mod ) { SR += align - count_mod ; }
-      }
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type cardinality() const
-    { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type capacity() const { return shape_type::N0 * SR ; }
-
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    {
-      size_type n = 1 ;
-      if ( 7 < shape_type::rank ) { s[7] = n ; n *= shape_type::N7 ; }
-      if ( 6 < shape_type::rank ) { s[6] = n ; n *= shape_type::N6 ; }
-      if ( 5 < shape_type::rank ) { s[5] = n ; n *= shape_type::N5 ; }
-      if ( 4 < shape_type::rank ) { s[4] = n ; n *= shape_type::N4 ; }
-      if ( 3 < shape_type::rank ) { s[3] = n ; n *= shape_type::N3 ; }
-      if ( 2 < shape_type::rank ) { s[2] = n ; n *= shape_type::N2 ; }
-      if ( 1 < shape_type::rank ) { s[1] = n ; n *= shape_type::N1 ; }
-      if ( 0 < shape_type::rank ) { s[0] = SR ; }
-      s[shape_type::rank] = SR * shape_type::N0 ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_7() const { return 1 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_6() const { return shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_5() const { return shape_type::N7 * shape_type::N6 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_4() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_3() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_2() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_1() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_0() const { return SR ; }
-
-  // rank 2
-  template <typename I0, typename I1>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1 ) const
-    {
-      return i1 + i0 * SR ;
-    }
-
-  template <typename I0, typename I1, typename I2>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const
-    {
-      return i2 + shape_type::N2 * ( i1 ) +
-             i0 * SR ;
-    }
-
-  template <typename I0, typename I1, typename I2, typename I3>
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const
-    {
-      return i3 + shape_type::N3 * (
-             i2 + shape_type::N2 * ( i1 )) +
-             i0 * SR ;
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const
-    {
-      return i4 + shape_type::N4 * (
-             i3 + shape_type::N3 * (
-             i2 + shape_type::N2 * ( i1 ))) +
-             i0 * SR ;
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const
-  {
-    return i5 + shape_type::N5 * (
-           i4 + shape_type::N4 * (
-           i3 + shape_type::N3 * (
-           i2 + shape_type::N2 * ( i1 )))) +
-           i0 * SR ;
-  }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const
-  {
-    return i6 + shape_type::N6 * (
-           i5 + shape_type::N5 * (
-           i4 + shape_type::N4 * (
-           i3 + shape_type::N3 * (
-           i2 + shape_type::N2 * ( i1 ))))) +
-           i0 * SR ;
-  }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const
-  {
-    return i7 + shape_type::N7 * (
-           i6 + shape_type::N6 * (
-           i5 + shape_type::N5 * (
-           i4 + shape_type::N4 * (
-           i3 + shape_type::N3 * (
-           i2 + shape_type::N2 * ( i1 )))))) +
-           i0 * SR ;
-  }
-};
-
-//----------------------------------------------------------------------------
-// LayoutStride : 
-template < class ShapeType >
-struct ViewOffset< ShapeType , LayoutStride
-                 , typename enable_if<( 0 < ShapeType::rank )>::type >
-  : public ShapeType
-{
-  typedef size_t        size_type;
-  typedef ShapeType     shape_type;
-  typedef LayoutStride  array_layout ;
-
-  size_type S[ shape_type::rank + 1 ];
-
-  template< class SType , class L >
-  KOKKOS_INLINE_FUNCTION
-  bool assign_subview( const ViewOffset<SType,L,void> & rhs
-                     , const size_type n0
-                     , const size_type n1
-                     , const size_type n2
-                     , const size_type n3
-                     , const size_type n4
-                     , const size_type n5
-                     , const size_type n6
-                     , const size_type n7
-                     )
-    {
-      shape_type::assign( *this, 0,0,0,0, 0,0,0,0 );
-
-      for ( int i = 0 ; i < int(shape_type::rank+1) ; ++i ) { S[i] = 0 ; }
-
-      // preconditions:
-      //  shape_type::rank <= rhs.rank
-      //  shape_type::rank == count of nonzero( rhs_dim[i] )
-      size_type dim[8] = { n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 };
-      size_type str[ SType::rank + 1 ];
-
-      rhs.stride( str );
-
-      // contract the zero-dimensions
-      int r = 0 ;
-      for ( int i = 0 ; i < int(SType::rank) ; ++i ) {
-        if ( 0 != dim[i] ) {
-          dim[r] = dim[i] ;
-          str[r] = str[i] ;
-          ++r ;
-        }
-      }
-
-      if ( int(shape_type::rank) == r ) {
-        // The shape is non-zero
-        for ( int i = 0 ; i < int(shape_type::rank) ; ++i ) {
-          const size_type cap = dim[i] * ( S[i] = str[i] );
-          if ( S[ shape_type::rank ] < cap ) S[ shape_type::rank ] = cap ;
-        }
-        // set the contracted nonzero dimensions
-        shape_type::assign( *this, dim[0], dim[1], dim[2], dim[3], dim[4], dim[5], dim[6], dim[7] );
-      }
-
-      return true ; // definitely noncontiguous
-    }
-
-  template< unsigned R >
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t n )
-    { assign_shape_dimension<R>( *this , n ); }
-
-  template< class ShapeRHS , class Layout >
-  KOKKOS_INLINE_FUNCTION
-  void assign( const ViewOffset<ShapeRHS,Layout> & rhs
-             , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) )>::type * = 0 )
-    {
-      rhs.stride(S);
-      shape_type::assign( *this, rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 );
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void assign( const LayoutStride & layout )
-  {
-    size_type max = 0 ;
-    for ( int i = 0 ; i < shape_type::rank ; ++i ) {
-      S[i] = layout.stride[i] ;
-      const size_type m = layout.dimension[i] * S[i] ;
-      if ( max < m ) { max = m ; }
-    }
-    S[ shape_type::rank ] = max ;
-    shape_type::assign( *this, layout.dimension[0], layout.dimension[1],
-                               layout.dimension[2], layout.dimension[3],
-                               layout.dimension[4], layout.dimension[5],
-                               layout.dimension[6], layout.dimension[7] );
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void assign( size_t s0 , size_t s1 , size_t s2 , size_t s3
-             , size_t s4 , size_t s5 , size_t s6 , size_t s7
-             , size_t s8 )
-    {
-      const size_t str[9] = { s0, s1, s2, s3, s4, s5, s6, s7, s8 };
-
-      // Last argument is the total length.
-      // Total length must be non-zero.
-      // All strides must be non-zero and less than total length.
-      bool ok = 0 < str[ shape_type::rank ] ;
-
-      for ( int i = 0 ; ( i < shape_type::rank ) &&
-                        ( ok = 0 < str[i] && str[i] < str[ shape_type::rank ] ); ++i );
-
-      if ( ok ) {
-        size_t dim[8] = { 1,1,1,1,1,1,1,1 }; 
-        int iorder[9] = { 0,0,0,0,0,0,0,0,0 }; 
-
-        // Ordering of strides smallest to largest.
-        for ( int i = 1 ; i < shape_type::rank ; ++i ) {
-          int j = i ;
-          for ( ; 0 < j && str[i] < str[ iorder[j-1] ] ; --j ) {
-            iorder[j] = iorder[j-1] ;
-          }
-          iorder[j] = i ;
-        }
-
-        // Last argument is the total length.
-        iorder[ shape_type::rank ] = shape_type::rank ;
-
-        // Determine dimension associated with each stride.
-        // Guarantees non-overlap by truncating dimension
-        // if ( 0 != str[ iorder[i+1] ] % str[ iorder[i] ] )
-        for ( int i = 0 ; i < shape_type::rank ; ++i ) {
-          dim[ iorder[i] ] = str[ iorder[i+1] ] / str[ iorder[i] ] ;
-        }
-
-        // Assign dimensions and strides:
-        shape_type::assign( *this, dim[0], dim[1], dim[2], dim[3], dim[4], dim[5], dim[6], dim[7] );
-        for ( int i = 0 ; i <= shape_type::rank ; ++i ) { S[i] = str[i] ; }
-      }
-      else {
-        shape_type::assign(*this,0,0,0,0,0,0,0,0);
-        for ( int i = 0 ; i <= shape_type::rank ; ++i ) { S[i] = 0 ; }
-      }
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void set_padding() {}
-
-  KOKKOS_INLINE_FUNCTION
-  size_type cardinality() const
-    { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type capacity() const { return S[ shape_type::rank ]; }
-
-  template< typename iType >
-  KOKKOS_INLINE_FUNCTION
-  void stride( iType * const s ) const
-    { for ( int i = 0 ; i <= shape_type::rank ; ++i ) { s[i] = S[i] ; } }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_0() const { return S[0] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_1() const { return S[1] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_2() const { return S[2] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_3() const { return S[3] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_4() const { return S[4] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_5() const { return S[5] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_6() const { return S[6] ; }
-
-  KOKKOS_INLINE_FUNCTION
-  size_type stride_7() const { return S[7] ; }
-
-  // rank 1
-  template <typename I0 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==1),size_type>::type
-    operator()( I0 const& i0) const
-    {
-      return i0 * S[0] ;
-    }
-
-  // rank 2
-  template <typename I0, typename I1>
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==2),size_type>::type
-    operator()( I0 const& i0, I1 const& i1 ) const
-    {
-      return i0 * S[0] + i1 * S[1] ;
-    }
-
-  template <typename I0, typename I1, typename I2>
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==3),size_type>::type
-    operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const
-    {
-      return i0 * S[0] + i1 * S[1] + i2 * S[2] ;
-    }
-
-  template <typename I0, typename I1, typename I2, typename I3>
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==4),size_type>::type
-    operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const
-    {
-      return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] ;
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==5),size_type>::type
-    operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const
-    {
-      return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] ;
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==6),size_type>::type
-    operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const
-    {
-      return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] ;
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==7),size_type>::type
-    operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const
-    {
-      return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] + i6 * S[6] ;
-    }
-
-  template < typename I0, typename I1, typename I2, typename I3
-            ,typename I4, typename I5, typename I6, typename I7 >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==8),size_type>::type
-    operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const
-    {
-      return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] + i6 * S[6] + i7 * S[7] ;
-    }
-};
-
-//----------------------------------------------------------------------------
-
-template< class T >
-struct ViewOffsetRange {
-
-  enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<T>::value >::value };
-
-  enum { is_range = false };
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t dimension( size_t const , T const & ) { return 0 ; }
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t begin( T const & i ) { return size_t(i) ; }
-};
-
-template<>
-struct ViewOffsetRange<void> {
-  enum { is_range = false };
-};
-
-template<>
-struct ViewOffsetRange< Kokkos::ALL > {
-  enum { is_range = true };
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t dimension( size_t const n , ALL const & ) { return n ; }
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t begin( ALL const & ) { return 0 ; }
-};
-
-template< typename iType >
-struct ViewOffsetRange< std::pair<iType,iType> > {
-
-  enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<iType>::value >::value };
-
-  enum { is_range = true };
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t dimension( size_t const n , std::pair<iType,iType> const & r )
-    { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; }
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t begin( std::pair<iType,iType> const & r ) { return size_t(r.first) ; }
-};
-
-template< typename iType >
-struct ViewOffsetRange< Kokkos::pair<iType,iType> > {
-
-  enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<iType>::value >::value };
-
-  enum { is_range = true };
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t dimension( size_t const n , Kokkos::pair<iType,iType> const & r )
-    { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; }
-
-  KOKKOS_INLINE_FUNCTION static
-  size_t begin( Kokkos::pair<iType,iType> const & r ) { return size_t(r.first) ; }
-};
-
-}} // namespace Kokkos::Impl
-
-#endif //KOKKOS_VIEWOFFSET_HPP
-
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
deleted file mode 100644
index 8b63039f57000e9d3b0ffa2aaad5a0c3c94d27c4..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_VIEWSUPPORT_HPP
-#define KOKKOS_VIEWSUPPORT_HPP
-
-#include <algorithm>
-#include <Kokkos_ExecPolicy.hpp>
-#include <impl/Kokkos_Shape.hpp>
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-/** \brief  Evaluate if LHS = RHS view assignment is allowed. */
-template< class ViewLHS , class ViewRHS >
-struct ViewAssignable
-{
-  // Same memory space.
-  // Same value type.
-  // Compatible 'const' qualifier
-  // Cannot assign managed = unmannaged
-  enum { assignable_value =
-    ( is_same< typename ViewLHS::value_type ,
-               typename ViewRHS::value_type >::value
-      ||
-      is_same< typename ViewLHS::value_type ,
-               typename ViewRHS::const_value_type >::value )
-    &&
-    is_same< typename ViewLHS::memory_space ,
-             typename ViewRHS::memory_space >::value
-    &&
-    ( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
-  };
-
-  enum { assignable_shape =
-    // Compatible shape and matching layout:
-    ( ShapeCompatible< typename ViewLHS::shape_type ,
-                       typename ViewRHS::shape_type >::value
-      &&
-      is_same< typename ViewLHS::array_layout ,
-               typename ViewRHS::array_layout >::value )
-    ||
-    // Matching layout, same rank, and LHS dynamic rank
-    ( is_same< typename ViewLHS::array_layout ,
-               typename ViewRHS::array_layout >::value
-      &&
-      int(ViewLHS::rank) == int(ViewRHS::rank)
-      &&
-      int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
-    ||
-    // Both rank-0, any shape and layout
-    ( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
-    ||
-    // Both rank-1 and LHS is dynamic rank-1, any shape and layout
-    ( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
-      int(ViewLHS::rank_dynamic) == 1 )
-    };
-
-  enum { value = assignable_value && assignable_shape };
-};
-
-} // namespace Impl
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< class ExecSpace , class Type , bool Initialize >
-struct ViewDefaultConstruct
-{ ViewDefaultConstruct( Type * , size_t ) {} };
-
-} // namespace Impl
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< class OutputView , class InputView  , unsigned Rank = OutputView::Rank >
-struct ViewRemap
-{
-  typedef typename OutputView::size_type   size_type ;
-
-  const OutputView output ;
-  const InputView  input ;
-  const size_type n0 ;
-  const size_type n1 ;
-  const size_type n2 ;
-  const size_type n3 ;
-  const size_type n4 ;
-  const size_type n5 ;
-  const size_type n6 ;
-  const size_type n7 ;
-
-  ViewRemap( const OutputView & arg_out , const InputView & arg_in )
-    : output( arg_out ), input( arg_in )
-    , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
-    , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
-    , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
-    , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
-    , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
-    , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
-    , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
-    , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
-    {
-      typedef typename OutputView::execution_space execution_space ;
-      Kokkos::RangePolicy< execution_space > range( 0 , n0 );
-      parallel_for( range , *this );
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const size_type i0 ) const
-  {
-    for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
-    for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
-    for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
-    for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
-    for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
-    for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
-    for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
-      output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
-    }}}}}}}
-  }
-};
-
-template< class OutputView , class InputView  >
-struct ViewRemap< OutputView ,  InputView , 0 >
-{
-  typedef typename OutputView::value_type   value_type ;
-  typedef typename OutputView::memory_space dst_space ;
-  typedef typename InputView ::memory_space src_space ;
-
-  ViewRemap( const OutputView & arg_out , const InputView & arg_in )
-  {
-    DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
-                                       arg_in.ptr_on_device() ,
-                                       sizeof(value_type) );
-  }
-};
-
-//----------------------------------------------------------------------------
-
-template< class ExecSpace , class Type >
-struct ViewDefaultConstruct< ExecSpace , Type , true >
-{
-  Type * const m_ptr ;
-
-  KOKKOS_FORCEINLINE_FUNCTION
-  void operator()( const typename ExecSpace::size_type& i ) const
-    { m_ptr[i] = Type(); }
-
-  ViewDefaultConstruct( Type * pointer , size_t capacity )
-    : m_ptr( pointer )
-    {
-      Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
-      parallel_for( range , *this );
-      ExecSpace::fence();
-    }
-};
-
-template< class OutputView , unsigned Rank = OutputView::Rank ,
-          class Enabled = void >
-struct ViewFill
-{
-  typedef typename OutputView::const_value_type  const_value_type ;
-  typedef typename OutputView::size_type         size_type ;
-
-  const OutputView output ;
-  const_value_type input ;
-
-  ViewFill( const OutputView & arg_out , const_value_type & arg_in )
-    : output( arg_out ), input( arg_in )
-    {
-      typedef typename OutputView::execution_space execution_space ;
-      Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() );
-      parallel_for( range , *this );
-      execution_space::fence();
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const size_type i0 ) const
-  {
-    for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
-    for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
-    for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
-    for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
-    for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
-    for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
-    for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
-      output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
-    }}}}}}}
-  }
-};
-
-template< class OutputView >
-struct ViewFill< OutputView , 0 >
-{
-  typedef typename OutputView::const_value_type  const_value_type ;
-  typedef typename OutputView::memory_space      dst_space ;
-
-  ViewFill( const OutputView & arg_out , const_value_type & arg_in )
-  {
-    DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
-                                       sizeof(const_value_type) );
-  }
-};
-
-} // namespace Impl
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-struct ViewAllocateWithoutInitializing {
-
-  const std::string label ;
-
-  ViewAllocateWithoutInitializing() : label() {}
-  explicit ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
-  explicit ViewAllocateWithoutInitializing( const char * const  arg_label ) : label( arg_label ) {}
-};
-
-struct ViewAllocate {
-
-  const std::string  label ;
-
-  ViewAllocate() : label() {}
-  ViewAllocate( const std::string & arg_label ) : label( arg_label ) {}
-  ViewAllocate( const char * const  arg_label ) : label( arg_label ) {}
-};
-
-}
-
-namespace Kokkos {
-namespace Impl {
-
-template< class Traits , class AllocationProperties , class Enable = void >
-struct ViewAllocProp : public Kokkos::Impl::false_type {};
-
-template< class Traits >
-struct ViewAllocProp< Traits , Kokkos::ViewAllocate
-  , typename Kokkos::Impl::enable_if<(
-      Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
-    )>::type >
-  : public Kokkos::Impl::true_type
-{
-  typedef size_t               size_type ;
-  typedef const ViewAllocate & property_type ;
-
-  enum { Initialize = true };
-  enum { AllowPadding = false };
-
-  inline
-  static const std::string & label( property_type p ) { return p.label ; }
-};
-
-template< class Traits >
-struct ViewAllocProp< Traits , std::string
-  , typename Kokkos::Impl::enable_if<(
-      Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
-    )>::type >
-  : public Kokkos::Impl::true_type
-{
-  typedef size_t              size_type ;
-  typedef const std::string & property_type ;
-
-  enum { Initialize = true };
-  enum { AllowPadding = false };
-
-  inline
-  static const std::string & label( property_type s ) { return s ; }
-};
-
-template< class Traits , unsigned N >
-struct ViewAllocProp< Traits , char[N]
-  , typename Kokkos::Impl::enable_if<(
-      Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
-    )>::type >
-  : public Kokkos::Impl::true_type
-{
-private:
-  typedef char label_type[N] ;
-public:
-
-  typedef size_t             size_type ;
-  typedef const label_type & property_type ;
-
-  enum { Initialize = true };
-  enum { AllowPadding = false };
-
-  inline
-  static std::string label( property_type s ) { return std::string(s) ; }
-};
-
-template< class Traits >
-struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing
-  , typename Kokkos::Impl::enable_if<(
-      Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
-    )>::type >
-  : public Kokkos::Impl::true_type
-{
-  typedef size_t size_type ;
-  typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ;
-
-  enum { Initialize = false };
-  enum { AllowPadding = false };
-
-  inline
-  static std::string label( property_type s ) { return s.label ; }
-};
-
-} // namespace Impl
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< class Traits , class PointerProperties , class Enable = void >
-struct ViewRawPointerProp : public Kokkos::Impl::false_type {};
-
-template< class Traits , typename T >
-struct ViewRawPointerProp< Traits , T ,
-  typename Kokkos::Impl::enable_if<(
-    Impl::is_same< T , typename Traits::value_type >::value ||
-    Impl::is_same< T , typename Traits::non_const_value_type >::value
-  )>::type >
-  : public Kokkos::Impl::true_type
-{
-  typedef size_t size_type ;
-};
-
-} // namespace Impl
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */
-
-
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp
similarity index 92%
rename from lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
rename to lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp
index 8b3749e853a85eea341c4ce8462aec755de4bb11..ecbcf72fe0b6ad92b6ec074f7a1b6b5dcca3322a 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp
@@ -145,9 +145,9 @@ public:
   //----------------------------------------
 
   ~ViewOffset() = default ;
-  ViewOffset() = default ;
-  ViewOffset( const ViewOffset & ) = default ;
-  ViewOffset & operator = ( const ViewOffset & ) = default ;
+  KOKKOS_INLINE_FUNCTION ViewOffset() = default ;
+  KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ;
+  KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ;
 
   template< unsigned TrivialScalarSize >
   KOKKOS_INLINE_FUNCTION
@@ -163,15 +163,15 @@ template< typename T , unsigned N0 , unsigned N1 , class ... P
         >
 struct ViewMapping
   < void
-  , Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
+  , Kokkos::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
   , Kokkos::LayoutTileLeft<N0,N1,true>
   , iType0
   , iType1 >
 {
   typedef Kokkos::LayoutTileLeft<N0,N1,true>  src_layout ;
-  typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ;
-  typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
-  typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ;
+  typedef Kokkos::ViewTraits< T** , src_layout , P... > src_traits ;
+  typedef Kokkos::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1] , LayoutLeft , P ... > type ;
 
   KOKKOS_INLINE_FUNCTION static
   void assign( ViewMapping< traits , void > & dst
@@ -203,8 +203,8 @@ namespace Experimental {
 
 template< typename T , unsigned N0 , unsigned N1 , class ... P >
 KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
-tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
+Kokkos::View< T[N0][N1] , LayoutLeft , P... >
+tile_subview( const Kokkos::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
             , const size_t i_tile0
             , const size_t i_tile1
             )
@@ -213,7 +213,7 @@ tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,
   // by using the first subview argument as the layout.
   typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ;
 
-  return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
+  return Kokkos::View< T[N0][N1] , LayoutLeft , P... >
     ( src , SrcLayout() , i_tile0 , i_tile1 );
 }
 
diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt
index 5bb2b672e124f3b282d760562514afb1719fd957..795657fe876233c8ef7f962bdce12be4d0452e2f 100644
--- a/lib/kokkos/core/unit_test/CMakeLists.txt
+++ b/lib/kokkos/core/unit_test/CMakeLists.txt
@@ -17,12 +17,33 @@ TRIBITS_ADD_LIBRARY(
 #
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
 
 IF(Kokkos_ENABLE_Serial)
   TRIBITS_ADD_EXECUTABLE_AND_TEST(
     UnitTest_Serial
-    SOURCES UnitTestMain.cpp TestSerial.cpp
+    SOURCES
+      UnitTestMain.cpp
+      serial/TestSerial_Atomics.cpp
+      serial/TestSerial_Other.cpp
+      serial/TestSerial_Reductions.cpp
+      serial/TestSerial_SubView_a.cpp
+      serial/TestSerial_SubView_b.cpp
+      serial/TestSerial_SubView_c01.cpp
+      serial/TestSerial_SubView_c02.cpp
+      serial/TestSerial_SubView_c03.cpp
+      serial/TestSerial_SubView_c04.cpp
+      serial/TestSerial_SubView_c05.cpp
+      serial/TestSerial_SubView_c06.cpp
+      serial/TestSerial_SubView_c07.cpp
+      serial/TestSerial_SubView_c08.cpp
+      serial/TestSerial_SubView_c09.cpp
+      serial/TestSerial_SubView_c10.cpp
+      serial/TestSerial_SubView_c11.cpp
+      serial/TestSerial_SubView_c12.cpp
+      serial/TestSerial_Team.cpp
+      serial/TestSerial_ViewAPI_a.cpp
+      serial/TestSerial_ViewAPI_b.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
     FAIL_REGULAR_EXPRESSION "  FAILED  "
@@ -33,7 +54,28 @@ ENDIF()
 IF(Kokkos_ENABLE_Pthread)
   TRIBITS_ADD_EXECUTABLE_AND_TEST(
     UnitTest_Threads
-    SOURCES UnitTestMain.cpp TestThreads.cpp
+    SOURCES
+      UnitTestMain.cpp
+      threads/TestThreads_Atomics.cpp
+      threads/TestThreads_Other.cpp
+      threads/TestThreads_Reductions.cpp
+      threads/TestThreads_SubView_a.cpp
+      threads/TestThreads_SubView_b.cpp
+      threads/TestThreads_SubView_c01.cpp
+      threads/TestThreads_SubView_c02.cpp
+      threads/TestThreads_SubView_c03.cpp
+      threads/TestThreads_SubView_c04.cpp
+      threads/TestThreads_SubView_c05.cpp
+      threads/TestThreads_SubView_c06.cpp
+      threads/TestThreads_SubView_c07.cpp
+      threads/TestThreads_SubView_c08.cpp
+      threads/TestThreads_SubView_c09.cpp
+      threads/TestThreads_SubView_c10.cpp
+      threads/TestThreads_SubView_c11.cpp
+      threads/TestThreads_SubView_c12.cpp
+      threads/TestThreads_Team.cpp
+      threads/TestThreads_ViewAPI_a.cpp
+      threads/TestThreads_ViewAPI_b.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
     FAIL_REGULAR_EXPRESSION "  FAILED  "
@@ -44,7 +86,28 @@ ENDIF()
 IF(Kokkos_ENABLE_OpenMP)
   TRIBITS_ADD_EXECUTABLE_AND_TEST(
     UnitTest_OpenMP
-    SOURCES UnitTestMain.cpp TestOpenMP.cpp TestOpenMP_a.cpp TestOpenMP_b.cpp TestOpenMP_c.cpp
+    SOURCES
+      UnitTestMain.cpp
+      openmp/TestOpenMP_Atomics.cpp
+      openmp/TestOpenMP_Other.cpp
+      openmp/TestOpenMP_Reductions.cpp
+      openmp/TestOpenMP_SubView_a.cpp
+      openmp/TestOpenMP_SubView_b.cpp
+      openmp/TestOpenMP_SubView_c01.cpp
+      openmp/TestOpenMP_SubView_c02.cpp
+      openmp/TestOpenMP_SubView_c03.cpp
+      openmp/TestOpenMP_SubView_c04.cpp
+      openmp/TestOpenMP_SubView_c05.cpp
+      openmp/TestOpenMP_SubView_c06.cpp
+      openmp/TestOpenMP_SubView_c07.cpp
+      openmp/TestOpenMP_SubView_c08.cpp
+      openmp/TestOpenMP_SubView_c09.cpp
+      openmp/TestOpenMP_SubView_c10.cpp
+      openmp/TestOpenMP_SubView_c11.cpp
+      openmp/TestOpenMP_SubView_c12.cpp
+      openmp/TestOpenMP_Team.cpp
+      openmp/TestOpenMP_ViewAPI_a.cpp
+      openmp/TestOpenMP_ViewAPI_b.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
     FAIL_REGULAR_EXPRESSION "  FAILED  "
@@ -66,7 +129,36 @@ ENDIF()
 IF(Kokkos_ENABLE_Cuda)
   TRIBITS_ADD_EXECUTABLE_AND_TEST(
     UnitTest_Cuda
-    SOURCES UnitTestMain.cpp TestCuda.cpp TestCuda_a.cpp TestCuda_b.cpp TestCuda_c.cpp
+    SOURCES
+      UnitTestMain.cpp
+      cuda/TestCuda_Atomics.cpp
+      cuda/TestCuda_Other.cpp
+      cuda/TestCuda_Reductions_a.cpp
+      cuda/TestCuda_Reductions_b.cpp
+      cuda/TestCuda_Spaces.cpp
+      cuda/TestCuda_SubView_a.cpp
+      cuda/TestCuda_SubView_b.cpp
+      cuda/TestCuda_SubView_c01.cpp
+      cuda/TestCuda_SubView_c02.cpp
+      cuda/TestCuda_SubView_c03.cpp
+      cuda/TestCuda_SubView_c04.cpp
+      cuda/TestCuda_SubView_c05.cpp
+      cuda/TestCuda_SubView_c06.cpp
+      cuda/TestCuda_SubView_c07.cpp
+      cuda/TestCuda_SubView_c08.cpp
+      cuda/TestCuda_SubView_c09.cpp
+      cuda/TestCuda_SubView_c10.cpp
+      cuda/TestCuda_SubView_c11.cpp
+      cuda/TestCuda_SubView_c12.cpp
+      cuda/TestCuda_Team.cpp
+      cuda/TestCuda_ViewAPI_a.cpp
+      cuda/TestCuda_ViewAPI_b.cpp
+      cuda/TestCuda_ViewAPI_c.cpp
+      cuda/TestCuda_ViewAPI_d.cpp
+      cuda/TestCuda_ViewAPI_e.cpp
+      cuda/TestCuda_ViewAPI_f.cpp
+      cuda/TestCuda_ViewAPI_g.cpp
+      cuda/TestCuda_ViewAPI_h.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
     FAIL_REGULAR_EXPRESSION "  FAILED  "
diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile
index 3d9d212c1ecdef658fdb9cf7d30fc542a6fb72d3..3203dec28c114ee4b7b0ca457dc3cb5cf2787708 100644
--- a/lib/kokkos/core/unit_test/Makefile
+++ b/lib/kokkos/core/unit_test/Makefile
@@ -3,50 +3,92 @@ KOKKOS_PATH = ../..
 GTEST_PATH = ../../tpls/gtest
 
 vpath %.cpp ${KOKKOS_PATH}/core/unit_test
+vpath %.cpp ${KOKKOS_PATH}/core/unit_test/serial
+vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads
+vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp
+vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda
+
 TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp)
+TEST_HEADERS += $(wildcard $(KOKKOS_PATH)/core/unit_test/*/*.hpp)
 
 default: build_all
 	echo "End Build"
 
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= -lpthread
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/unit_test
 
 TEST_TARGETS =
 TARGETS =
 
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	OBJ_CUDA = TestCuda_c.o TestCuda_b.o TestCuda_a.o TestCuda.o UnitTestMain.o gtest-all.o
+	OBJ_CUDA = TestCuda_Other.o TestCuda_Reductions_a.o TestCuda_Reductions_b.o TestCuda_Atomics.o TestCuda_Team.o TestCuda_Spaces.o
+	OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o
+ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+        OBJ_OPENMP += TestCuda_SubView_c_all.o
+else
+	OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o
+	OBJ_CUDA += TestCuda_SubView_c04.o  TestCuda_SubView_c05.o  TestCuda_SubView_c06.o
+	OBJ_CUDA += TestCuda_SubView_c07.o  TestCuda_SubView_c08.o  TestCuda_SubView_c09.o
+	OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o
+endif
+	OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o  TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o
+	OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o  TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o
+	OBJ_CUDA += UnitTestMain.o gtest-all.o
 	TARGETS += KokkosCore_UnitTest_Cuda
 	TEST_TARGETS += test-cuda
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
-	OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
+	OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o 
+	OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o 
+	OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o
+	OBJ_THREADS += TestThreads_SubView_c04.o  TestThreads_SubView_c05.o  TestThreads_SubView_c06.o  
+	OBJ_THREADS += TestThreads_SubView_c07.o  TestThreads_SubView_c08.o  TestThreads_SubView_c09.o
+	OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o
+	OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o 
 	TARGETS += KokkosCore_UnitTest_Threads
 	TEST_TARGETS += test-threads
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-	OBJ_OPENMP = TestOpenMP_c.o TestOpenMP_b.o TestOpenMP_a.o TestOpenMP.o UnitTestMain.o gtest-all.o
+	OBJ_OPENMP = TestOpenMP_Other.o TestOpenMP_Reductions.o TestOpenMP_Atomics.o TestOpenMP_Team.o
+	OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o
+ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+        OBJ_OPENMP += TestOpenMP_SubView_c_all.o
+else
+	OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o
+	OBJ_OPENMP += TestOpenMP_SubView_c04.o  TestOpenMP_SubView_c05.o  TestOpenMP_SubView_c06.o
+	OBJ_OPENMP += TestOpenMP_SubView_c07.o  TestOpenMP_SubView_c08.o  TestOpenMP_SubView_c09.o
+	OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o
+endif
+	OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o UnitTestMain.o gtest-all.o
 	TARGETS += KokkosCore_UnitTest_OpenMP
 	TEST_TARGETS += test-openmp
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-	OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
+	OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o 
+	OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o 
+ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+        OBJ_OPENMP += TestSerial_SubView_c_all.o
+else
+	OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o
+	OBJ_SERIAL += TestSerial_SubView_c04.o  TestSerial_SubView_c05.o  TestSerial_SubView_c06.o  
+	OBJ_SERIAL += TestSerial_SubView_c07.o  TestSerial_SubView_c08.o  TestSerial_SubView_c09.o
+	OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o
+endif
+	OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o 
 	TARGETS += KokkosCore_UnitTest_Serial
 	TEST_TARGETS += test-serial
 endif
@@ -61,7 +103,7 @@ OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o
 TARGETS += KokkosCore_UnitTest_HWLOC
 TEST_TARGETS += test-hwloc
 
-OBJ_DEFAULT = TestDefaultDeviceType.o TestDefaultDeviceType_a.o UnitTestMain.o gtest-all.o
+OBJ_DEFAULT = TestDefaultDeviceType.o TestDefaultDeviceType_a.o TestDefaultDeviceType_b.o TestDefaultDeviceType_c.o TestDefaultDeviceType_d.o UnitTestMain.o gtest-all.o
 TARGETS += KokkosCore_UnitTest_Default
 TEST_TARGETS += test-default
 
diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp
index 5388a60787cb9217a4436798d826dcc53f55d3f2..d22837f3ed7b67bccecfbe11ba4d71266a094616 100644
--- a/lib/kokkos/core/unit_test/TestAggregate.hpp
+++ b/lib/kokkos/core/unit_test/TestAggregate.hpp
@@ -52,7 +52,7 @@
 
 /*--------------------------------------------------------------------------*/
 
-#include <impl/KokkosExp_ViewArray.hpp>
+#include <impl/Kokkos_ViewArray.hpp>
 
 namespace Test {
 
diff --git a/lib/kokkos/core/unit_test/TestAggregateReduction.hpp b/lib/kokkos/core/unit_test/TestAggregateReduction.hpp
deleted file mode 100644
index bd05cd347b979e305becead88a898d27b0a7d4f8..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestAggregateReduction.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef TEST_AGGREGATE_REDUCTION_HPP
-#define TEST_AGGREGATE_REDUCTION_HPP
-
-#include <gtest/gtest.h>
-
-#include <stdexcept>
-#include <sstream>
-#include <iostream>
-
-namespace Test {
-
-template< typename T , unsigned N >
-struct StaticArray {
-  T value[N] ;
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray() = default;
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray( const StaticArray & rhs ) = default;
-
-  KOKKOS_INLINE_FUNCTION
-  operator T () { return value[0]; }
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray & operator = ( const T & rhs )
-    {
-      for ( unsigned i = 0 ; i < N ; ++i ) value[i] = rhs ;
-      return *this ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray & operator = ( const StaticArray & rhs ) = default;
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray operator * ( const StaticArray & rhs )
-    {
-      StaticArray tmp ;
-      for ( unsigned i = 0 ; i < N ; ++i ) tmp.value[i] = value[i] * rhs.value[i] ;
-      return tmp ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray operator + ( const StaticArray & rhs )
-    {
-      StaticArray tmp ;
-      for ( unsigned i = 0 ; i < N ; ++i ) tmp.value[i] = value[i] + rhs.value[i] ;
-      return tmp ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  StaticArray & operator += ( const StaticArray & rhs )
-    {
-      for ( unsigned i = 0 ; i < N ; ++i ) value[i] += rhs.value[i] ;
-      return *this ;
-    }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator += ( const volatile StaticArray & rhs ) volatile
-    {
-      for ( unsigned i = 0 ; i < N ; ++i ) value[i] += rhs.value[i] ;
-    }
-};
-
-static_assert(std::is_trivial<StaticArray<int, 4>>::value, "Not trivial");
-
-template< typename T , class Space >
-struct DOT {
-  typedef T      value_type ;
-  typedef Space  execution_space ;
-
-  Kokkos::View< value_type * , Space > a ;
-  Kokkos::View< value_type * , Space > b ;
-
-  DOT( const Kokkos::View< value_type * , Space > arg_a
-     , const Kokkos::View< value_type * , Space > arg_b
-     )
-    : a( arg_a ), b( arg_b ) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const int i , value_type & update ) const
-    {
-      update += a(i) * b(i);
-    }
-};
-
-template< typename T , class Space >
-struct FILL {
-  typedef T      value_type ;
-  typedef Space  execution_space ;
-
-  Kokkos::View< value_type * , Space > a ;
-  Kokkos::View< value_type * , Space > b ;
-
-  FILL( const Kokkos::View< value_type * , Space > & arg_a
-      , const Kokkos::View< value_type * , Space > & arg_b
-      )
-    : a( arg_a ), b( arg_b ) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const int i ) const
-    {
-      a(i) = i % 2 ? i + 1 : 1 ;
-      b(i) = i % 2 ? 1 : i + 1 ;
-    }
-};
-
-template< class Space >
-void TestViewAggregateReduction()
-{
-
-#if ! KOKKOS_USING_EXP_VIEW
-
-  const int count = 2 ;
-  const long result = count % 2 ? ( count * ( ( count + 1 ) / 2 ) )
-                                : ( ( count / 2 ) * ( count + 1 ) );
-
-  Kokkos::View< long * , Space > a("a",count);
-  Kokkos::View< long * , Space > b("b",count);
-  Kokkos::View< StaticArray<long,4> * , Space > a4("a4",count);
-  Kokkos::View< StaticArray<long,4> * , Space > b4("b4",count);
-  Kokkos::View< StaticArray<long,10> * , Space > a10("a10",count);
-  Kokkos::View< StaticArray<long,10> * , Space > b10("b10",count);
-
-  Kokkos::parallel_for( count , FILL<long,Space>(a,b) );
-  Kokkos::parallel_for( count , FILL< StaticArray<long,4> , Space >(a4,b4) );
-  Kokkos::parallel_for( count , FILL< StaticArray<long,10> , Space >(a10,b10) );
-
-  long r = 0;
-  StaticArray<long,4> r4 ;
-  StaticArray<long,10> r10 ;
-
-  Kokkos::parallel_reduce( count , DOT<long,Space>(a,b) , r );
-  Kokkos::parallel_reduce( count , DOT< StaticArray<long,4> , Space >(a4,b4) , r4 );
-  Kokkos::parallel_reduce( count , DOT< StaticArray<long,10> , Space >(a10,b10) , r10 );
-
-  ASSERT_EQ( result , r );
-  for ( int i = 0 ; i < 10 ; ++i ) { ASSERT_EQ( result , r10.value[i] ); }
-  for ( int i = 0 ; i < 4 ; ++i ) { ASSERT_EQ( result , r4.value[i] ); }
-
-#endif
-
-}
-
-}
-
-#endif /* #ifndef TEST_AGGREGATE_REDUCTION_HPP */
-
diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp
index aee4bda06cea276e12fca664a48c81a428445bcd..7f1519045187c535c586659e757eeb24609ccb50 100644
--- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp
+++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp
@@ -226,6 +226,148 @@ bool MinAtomicTest(T i0, T i1)
   return passed ;
 }
 
+//---------------------------------------------------
+//--------------atomic_increment---------------------
+//---------------------------------------------------
+
+template<class T,class DEVICE_TYPE>
+struct IncFunctor{
+  typedef DEVICE_TYPE execution_space;
+  typedef Kokkos::View<T,execution_space> type;
+  type data;
+  T i0;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int) const {
+    Kokkos::atomic_increment(&data());
+  }
+  IncFunctor( T _i0 ) : i0(_i0) {}
+};
+
+template<class T, class execution_space >
+T IncAtomic(T i0) {
+  struct InitFunctor<T,execution_space> f_init(i0);
+  typename InitFunctor<T,execution_space>::type data("Data");
+  typename InitFunctor<T,execution_space>::h_type h_data("HData");
+  f_init.data = data;
+  Kokkos::parallel_for(1,f_init);
+  execution_space::fence();
+
+  struct IncFunctor<T,execution_space> f(i0);
+  f.data = data;
+  Kokkos::parallel_for(1,f);
+  execution_space::fence();
+
+  Kokkos::deep_copy(h_data,data);
+  T val = h_data();
+  return val;
+}
+
+template<class T>
+T IncAtomicCheck(T i0) {
+  T* data = new T[1];
+  data[0] = 0;
+
+  *data = i0 + 1;
+
+  T val = *data;
+  delete [] data;
+  return val;
+}
+
+template<class T,class DeviceType>
+bool IncAtomicTest(T i0)
+{
+  T res       = IncAtomic<T,DeviceType>(i0);
+  T resSerial = IncAtomicCheck<T>(i0);
+
+  bool passed = true;
+
+  if ( resSerial != res ) {
+    passed = false;
+
+    std::cout << "Loop<"
+              << typeid(T).name()
+              << ">( test = IncAtomicTest"
+              << " FAILED : "
+              << resSerial << " != " << res
+              << std::endl ;
+  }
+
+  return passed ;
+}
+
+//---------------------------------------------------
+//--------------atomic_decrement---------------------
+//---------------------------------------------------
+
+template<class T,class DEVICE_TYPE>
+struct DecFunctor{
+  typedef DEVICE_TYPE execution_space;
+  typedef Kokkos::View<T,execution_space> type;
+  type data;
+  T i0;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int) const {
+    Kokkos::atomic_decrement(&data());
+  }
+  DecFunctor( T _i0 ) : i0(_i0) {}
+};
+
+template<class T, class execution_space >
+T DecAtomic(T i0) {
+  struct InitFunctor<T,execution_space> f_init(i0);
+  typename InitFunctor<T,execution_space>::type data("Data");
+  typename InitFunctor<T,execution_space>::h_type h_data("HData");
+  f_init.data = data;
+  Kokkos::parallel_for(1,f_init);
+  execution_space::fence();
+
+  struct DecFunctor<T,execution_space> f(i0);
+  f.data = data;
+  Kokkos::parallel_for(1,f);
+  execution_space::fence();
+
+  Kokkos::deep_copy(h_data,data);
+  T val = h_data();
+  return val;
+}
+
+template<class T>
+T DecAtomicCheck(T i0) {
+  T* data = new T[1];
+  data[0] = 0;
+
+  *data = i0 - 1;
+
+  T val = *data;
+  delete [] data;
+  return val;
+}
+
+template<class T,class DeviceType>
+bool DecAtomicTest(T i0)
+{
+  T res       = DecAtomic<T,DeviceType>(i0);
+  T resSerial = DecAtomicCheck<T>(i0);
+
+  bool passed = true;
+
+  if ( resSerial != res ) {
+    passed = false;
+
+    std::cout << "Loop<"
+              << typeid(T).name()
+              << ">( test = DecAtomicTest"
+              << " FAILED : "
+              << resSerial << " != " << res
+              << std::endl ;
+  }
+
+  return passed ;
+}
+
 //---------------------------------------------------
 //--------------atomic_fetch_mul---------------------
 //---------------------------------------------------
@@ -821,6 +963,8 @@ bool AtomicOperationsTestIntegralType( int i0 , int i1 , int test )
     case 8: return XorAtomicTest<T,DeviceType>( (T)i0 , (T)i1 );
     case 9: return LShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 );
     case 10: return RShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 );
+    case 11: return IncAtomicTest<T,DeviceType>( (T)i0 );
+    case 12: return DecAtomicTest<T,DeviceType>( (T)i0 );
   }
   return 0;
 }
diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp
index dfa2250c04ae8cc785383b1f64a127ad40279f57..71c22144896627cb8886a716acd214830fa53af0 100644
--- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp
+++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp
@@ -70,8 +70,10 @@ struct AddFunctor {
 #ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT
     #pragma loop count(128)
 #endif
+#ifndef KOKKOS_HAVE_DEBUG
 #ifdef KOKKOS_HAVE_PRAGMA_SIMD
     #pragma simd
+#endif
 #endif
     for(int j=0;j<length;j++)
       a(i,j) += b(i,j);
diff --git a/lib/kokkos/core/unit_test/TestCuda.cpp b/lib/kokkos/core/unit_test/TestCuda.cpp
deleted file mode 100644
index e6155662525f08fd718e02a40243e942dd77104d..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestCuda.cpp
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-//
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-
-#include <gtest/gtest.h>
-
-#include <iostream>
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp>
-#include <impl/Kokkos_ViewTileLeft.hpp>
-#include <TestTile.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestMemoryPool.hpp>
-#include <TestTeamVector.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-#include <TestCXX11Deduction.hpp>
-
-#include <TestTaskPolicy.hpp>
-#include <TestPolicyConstruction.hpp>
-
-#include <TestMDRange.hpp>
-
-//----------------------------------------------------------------------------
-
-class cuda : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
-void cuda::SetUpTestCase()
-  {
-    Kokkos::Cuda::print_configuration( std::cout );
-    Kokkos::HostSpace::execution_space::initialize();
-    Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
-  }
-
-void cuda::TearDownTestCase()
-  {
-    Kokkos::Cuda::finalize();
-    Kokkos::HostSpace::execution_space::finalize();
-  }
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Test {
-
-__global__
-void test_abort()
-{
-  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
-    Kokkos::CudaSpace ,
-    Kokkos::HostSpace >::verify();
-}
-
-__global__
-void test_cuda_spaces_int_value( int * ptr )
-{
-  if ( *ptr == 42 ) { *ptr = 2 * 42 ; }
-}
-
-TEST_F( cuda , md_range ) {
-  TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100);
-
-  TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100);
-}
-
-TEST_F( cuda , compiler_macros )
-{
-  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) );
-}
-
-TEST_F( cuda , memory_space )
-{
-  TestMemorySpace< Kokkos::Cuda >();
-}
-
-TEST_F( cuda, uvm )
-{
-  if ( Kokkos::CudaUVMSpace::available() ) {
-
-    int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int));
-
-    *uvm_ptr = 42 ;
-
-    Kokkos::Cuda::fence();
-    test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr);
-    Kokkos::Cuda::fence();
-
-    EXPECT_EQ( *uvm_ptr, int(2*42) );
-
-    Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr );
-  }
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( cuda , impl_shared_alloc )
-{
-  test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >();
-  test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >();
-  test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >();
-}
-
-TEST_F( cuda, policy_construction) {
-  TestRangePolicyConstruction< Kokkos::Cuda >();
-  TestTeamPolicyConstruction< Kokkos::Cuda >();
-}
-
-TEST_F( cuda , impl_view_mapping )
-{
-  test_view_mapping< Kokkos::Cuda >();
-  test_view_mapping< Kokkos::CudaUVMSpace >();
-  test_view_mapping_subview< Kokkos::Cuda >();
-  test_view_mapping_subview< Kokkos::CudaUVMSpace >();
-  test_view_mapping_operator< Kokkos::Cuda >();
-  test_view_mapping_operator< Kokkos::CudaUVMSpace >();
-  TestViewMappingAtomic< Kokkos::Cuda >::run();
-}
-
-TEST_F( cuda , view_of_class )
-{
-  TestViewMappingClassValue< Kokkos::CudaSpace >::run();
-  TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run();
-}
-
-template< class MemSpace >
-struct TestViewCudaTexture {
-
-  enum { N = 1000 };
-
-  using V = Kokkos::Experimental::View<double*,MemSpace> ;
-  using T = Kokkos::Experimental::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ;
-
-  V m_base ;
-  T m_tex ;
-
-  struct TagInit {};
-  struct TagTest {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const TagTest & , const int i , long & error_count ) const
-    { if ( m_tex[i] != i + 1 ) ++error_count ; }
-
-  TestViewCudaTexture()
-    : m_base("base",N)
-    , m_tex( m_base )
-    {}
-
-  static void run()
-    {
-      EXPECT_TRUE( ( std::is_same< typename V::reference_type
-                                 , double &
-                                 >::value ) );
-
-      EXPECT_TRUE( ( std::is_same< typename T::reference_type
-                                 , const double
-                                 >::value ) );
-
-      EXPECT_TRUE(  V::reference_type_is_lvalue_reference ); // An ordinary view
-      EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value
-
-      TestViewCudaTexture self ;
-      Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self );
-      long error_count = -1 ;
-      Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count );
-      EXPECT_EQ( error_count , 0 );
-    }
-};
-
-TEST_F( cuda , impl_view_texture )
-{
-  TestViewCudaTexture< Kokkos::CudaSpace >::run();
-  TestViewCudaTexture< Kokkos::CudaUVMSpace >::run();
-}
-
-template< class MemSpace , class ExecSpace >
-struct TestViewCudaAccessible {
-
-  enum { N = 1000 };
-
-  using V = Kokkos::Experimental::View<double*,MemSpace> ;
-
-  V m_base ;
-
-  struct TagInit {};
-  struct TagTest {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const TagTest & , const int i , long & error_count ) const
-    { if ( m_base[i] != i + 1 ) ++error_count ; }
-
-  TestViewCudaAccessible()
-    : m_base("base",N)
-    {}
-
-  static void run()
-    {
-      TestViewCudaAccessible self ;
-      Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self );
-      MemSpace::execution_space::fence();
-      // Next access is a different execution space, must complete prior kernel.
-      long error_count = -1 ;
-      Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count );
-      EXPECT_EQ( error_count , 0 );
-    }
-};
-
-TEST_F( cuda , impl_view_accessible )
-{
-  TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run();
-
-  TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run();
-  TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run();
-
-  TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run();
-  TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run();
-}
-
-}
diff --git a/lib/kokkos/core/unit_test/TestCuda_a.cpp b/lib/kokkos/core/unit_test/TestCuda_a.cpp
deleted file mode 100644
index 4680c333867ff0e68f572121a654f8f23d09fcfb..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestCuda_a.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-//
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-
-#include <gtest/gtest.h>
-
-#include <iostream>
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp>
-#include <impl/Kokkos_ViewTileLeft.hpp>
-#include <TestTile.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestMemoryPool.hpp>
-#include <TestTeamVector.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-#include <TestCXX11Deduction.hpp>
-
-#include <TestTaskPolicy.hpp>
-#include <TestPolicyConstruction.hpp>
-
-//----------------------------------------------------------------------------
-
-class cuda : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
-//----------------------------------------------------------------------------
-
-namespace Test {
-
-TEST_F( cuda, view_impl )
-{
-  // test_abort<<<32,32>>>(); // Aborts the kernel with CUDA version 4.1 or greater
-
-  test_view_impl< Kokkos::Cuda >();
-}
-
-TEST_F( cuda, view_api )
-{
-  typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ;
-  typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ;
-
-  TestViewAPI< double , Kokkos::Cuda >();
-  TestViewAPI< double , Kokkos::CudaUVMSpace >();
-
-#if 0
-  Kokkos::View<double, Kokkos::Cuda > x("x");
-  Kokkos::View<double[1], Kokkos::Cuda > y("y");
-  // *x = 10 ;
-  // x() = 10 ;
-  // y[0] = 10 ;
-  // y(0) = 10 ;
-#endif
-}
-
-TEST_F( cuda , view_nested_view )
-{
-  ::Test::view_nested_view< Kokkos::Cuda >();
-}
-
-TEST_F( cuda, view_subview_auto_1d_left ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >();
-}
-
-TEST_F( cuda, view_subview_auto_1d_right ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >();
-}
-
-TEST_F( cuda, view_subview_auto_1d_stride ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >();
-}
-
-TEST_F( cuda, view_subview_assign_strided ) {
-  TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >();
-}
-
-TEST_F( cuda, view_subview_left_0 ) {
-  TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_left_1 ) {
-  TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_left_2 ) {
-  TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_left_3 ) {
-  TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_right_0 ) {
-  TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_right_1 ) {
-  TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_right_3 ) {
-  TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_1d_assign ) {
-  TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_2d_from_3d ) {
-  TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >();
-}
-
-TEST_F( cuda, view_subview_2d_from_5d ) {
-  TestViewSubview::test_2d_subview_5d< Kokkos::CudaUVMSpace >();
-}
-
-}
diff --git a/lib/kokkos/core/unit_test/TestCuda_b.cpp b/lib/kokkos/core/unit_test/TestCuda_b.cpp
deleted file mode 100644
index d4ca949e57cb02d15444ec7f3e48b123003b6a68..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestCuda_b.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-//
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-
-#include <gtest/gtest.h>
-
-#include <iostream>
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp>
-#include <impl/Kokkos_ViewTileLeft.hpp>
-#include <TestTile.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestMemoryPool.hpp>
-#include <TestTeamVector.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-#include <TestCXX11Deduction.hpp>
-
-#include <TestTaskPolicy.hpp>
-#include <TestPolicyConstruction.hpp>
-
-//----------------------------------------------------------------------------
-
-class cuda : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
-//----------------------------------------------------------------------------
-
-namespace Test {
-
-TEST_F( cuda, range_tag )
-{
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(3);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(3);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(3);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
-  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
-  //TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
-}
-
-TEST_F( cuda, team_tag )
-{
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(3);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(3);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
-  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
-}
-
-TEST_F( cuda, reduce )
-{
-  TestReduce< long ,   Kokkos::Cuda >( 10000000 );
-  TestReduce< double , Kokkos::Cuda >( 1000000 );
-  TestReduce< int , Kokkos::Cuda >( 0 );
-}
-
-TEST_F( cuda , reducers )
-{
-  TestReducers<int, Kokkos::Cuda>::execute_integer();
-  TestReducers<size_t, Kokkos::Cuda>::execute_integer();
-  TestReducers<double, Kokkos::Cuda>::execute_float();
-  TestReducers<Kokkos::complex<double>, Kokkos::Cuda>::execute_basic();
-}
-
-TEST_F( cuda, reduce_team )
-{
-  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( cuda, shared_team )
-{
-  TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >();
-  TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >();
-}
-
-#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
-TEST_F( cuda, lambda_shared_team )
-{
-  TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >();
-  TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >();
-  TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static>  >();
-  TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >();
-  TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >();
-  TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic>  >();
-}
-#endif
-
-TEST_F( cuda, shmem_size) {
-  TestShmemSize< Kokkos::Cuda >();
-}
-
-TEST_F( cuda, multi_level_scratch) {
-  TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >();
-  TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >();
-}
-
-TEST_F( cuda, reduce_dynamic )
-{
-  TestReduceDynamic< long ,   Kokkos::Cuda >( 10000000 );
-  TestReduceDynamic< double , Kokkos::Cuda >( 1000000 );
-}
-
-TEST_F( cuda, reduce_dynamic_view )
-{
-  TestReduceDynamicView< long ,   Kokkos::Cuda >( 10000000 );
-  TestReduceDynamicView< double , Kokkos::Cuda >( 1000000 );
-}
-
-}
diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp
index 1b1e0e67365fa28778cb848cbd52d0a2399c97e6..87a534f11dfc29913abb5f36eeba63da6d817c2e 100644
--- a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -48,7 +48,6 @@
 #if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__)
 //----------------------------------------------------------------------------
 
-#include <TestViewImpl.hpp>
 #include <TestAtomic.hpp>
 
 #include <TestViewAPI.hpp>
@@ -60,6 +59,7 @@
 #include <TestCompilerMacros.hpp>
 #include <TestCXX11.hpp>
 #include <TestTeamVector.hpp>
+#include <TestUtilities.hpp>
 
 namespace Test {
 
@@ -76,165 +76,24 @@ protected:
   }
 };
 
-
-TEST_F( defaultdevicetype, view_impl) {
-  test_view_impl< Kokkos::DefaultExecutionSpace >();
-}
-
-TEST_F( defaultdevicetype, view_api) {
-  TestViewAPI< double , Kokkos::DefaultExecutionSpace >();
-}
-
-TEST_F( defaultdevicetype, long_reduce) {
-  TestReduce< long ,   Kokkos::DefaultExecutionSpace >( 100000 );
-}
-
-TEST_F( defaultdevicetype, double_reduce) {
-  TestReduce< double ,   Kokkos::DefaultExecutionSpace >( 100000 );
-}
-
-TEST_F( defaultdevicetype, long_reduce_dynamic ) {
-  TestReduceDynamic< long ,   Kokkos::DefaultExecutionSpace >( 100000 );
-}
-
-TEST_F( defaultdevicetype, double_reduce_dynamic ) {
-  TestReduceDynamic< double ,   Kokkos::DefaultExecutionSpace >( 100000 );
-}
-
-TEST_F( defaultdevicetype, long_reduce_dynamic_view ) {
-  TestReduceDynamicView< long ,   Kokkos::DefaultExecutionSpace >( 100000 );
-}
-
-
-TEST_F( defaultdevicetype , atomics )
+TEST_F( defaultdevicetype, host_space_access )
 {
-  const int loop_count = 1e4 ;
+  typedef Kokkos::HostSpace::execution_space host_exec_space ;
+  typedef Kokkos::Device< host_exec_space , Kokkos::HostSpace > device_space ;
+  typedef Kokkos::Impl::HostMirror< Kokkos::DefaultExecutionSpace >::Space mirror_space ;
 
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< host_exec_space , Kokkos::HostSpace >::accessible , "" );
 
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< device_space , Kokkos::HostSpace >::accessible , "" );
 
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) );
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< mirror_space , Kokkos::HostSpace >::accessible , "" );
 }
 
-/*TEST_F( defaultdevicetype , view_remap )
-{
-  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
-
-  typedef Kokkos::View< double*[N1][N2][N3] ,
-                             Kokkos::LayoutRight ,
-                             Kokkos::DefaultExecutionSpace > output_type ;
-
-  typedef Kokkos::View< int**[N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::DefaultExecutionSpace > input_type ;
-
-  typedef Kokkos::View< int*[N0][N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::DefaultExecutionSpace > diff_type ;
-
-  output_type output( "output" , N0 );
-  input_type  input ( "input" , N0 , N1 );
-  diff_type   diff  ( "diff" , N0 );
-
-  int value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    input(i0,i1,i2,i3) = ++value ;
-  }}}}
-
-  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
-  Kokkos::deep_copy( output , input );
-
-  value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    ++value ;
-    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
-  }}}}
-}*/
-
-//----------------------------------------------------------------------------
-
-
-TEST_F( defaultdevicetype , view_aggregate )
-{
-  TestViewAggregate< Kokkos::DefaultExecutionSpace >();
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( defaultdevicetype , scan )
-{
-  TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 );
-  TestScan< Kokkos::DefaultExecutionSpace >( 1000000 );
-  TestScan< Kokkos::DefaultExecutionSpace >( 10000000 );
-  Kokkos::DefaultExecutionSpace::fence();
-}
-
-
-//----------------------------------------------------------------------------
-
-TEST_F( defaultdevicetype , compiler_macros )
-{
-  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) );
-}
-
-
-//----------------------------------------------------------------------------
-TEST_F( defaultdevicetype , cxx11 )
-{
-  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) );
-  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) );
-  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) );
-  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) );
-}
-
-TEST_F( defaultdevicetype , team_vector )
-{
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) );
-}
-
-TEST_F( defaultdevicetype , malloc )
-{
-  int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int));
-  ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int)));
-  Kokkos::kokkos_free(data);
-
-  int* data2 = (int*) Kokkos::kokkos_malloc(0);
-  ASSERT_TRUE(data2==NULL);
-  Kokkos::kokkos_free(data2);
+TEST_F( defaultdevicetype, view_api) {
+  TestViewAPI< double , Kokkos::DefaultExecutionSpace >();
 }
 
 } // namespace test
diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
index a17ed97a9ff4130a2ca2ea087b400e9595c69dd9..caeb56c9e179416ec23a8d17582fa013e0896e0b 100644
--- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
@@ -84,8 +84,8 @@ namespace Impl {
       }
 
 #ifdef KOKKOS_HAVE_SERIAL
-      if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
-         Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
+      if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
+         std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
         nthreads = 1;
       }
 #endif
@@ -98,8 +98,8 @@ namespace Impl {
       if(Kokkos::hwloc::available())
         numa = Kokkos::hwloc::get_available_numa_count();
 #ifdef KOKKOS_HAVE_SERIAL
-      if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
-         Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
+      if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
+         std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
         numa = 1;
       }
 #endif
@@ -142,8 +142,8 @@ namespace Impl {
                        * Kokkos::hwloc::get_available_numa_count();
       }
 #ifdef KOKKOS_HAVE_SERIAL
-      if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
-         Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
+      if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
+         std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
         nthreads = 1;
       }
 #endif
@@ -156,8 +156,8 @@ namespace Impl {
       if(Kokkos::hwloc::available())
         numa = Kokkos::hwloc::get_available_numa_count();
 #ifdef KOKKOS_HAVE_SERIAL
-      if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
-         Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
+      if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value ||
+         std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) {
         numa = 1;
       }
 #endif
@@ -184,7 +184,7 @@ namespace Impl {
                           * Kokkos::hwloc::get_available_threads_per_core();
       } else {
         #ifdef KOKKOS_HAVE_OPENMP
-        if(Kokkos::Impl::is_same<Kokkos::HostSpace::execution_space,Kokkos::OpenMP>::value) {
+        if(std::is_same<Kokkos::HostSpace::execution_space,Kokkos::OpenMP>::value) {
           expected_nthreads = omp_get_max_threads();
         } else
         #endif
@@ -192,8 +192,8 @@ namespace Impl {
 
       }
       #ifdef KOKKOS_HAVE_SERIAL
-      if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value ||
-         Kokkos::Impl::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) 
+      if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value ||
+         std::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) 
         expected_nthreads = 1;
       #endif
     }
@@ -206,15 +206,15 @@ namespace Impl {
         expected_numa = 1;
       }
       #ifdef KOKKOS_HAVE_SERIAL
-      if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value ||
-         Kokkos::Impl::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value )
+      if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value ||
+         std::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value )
         expected_numa = 1;
       #endif
     }
     ASSERT_EQ(Kokkos::HostSpace::execution_space::thread_pool_size(),expected_nthreads);
 
 #ifdef KOKKOS_HAVE_CUDA
-    if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Cuda>::value) {
+    if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Cuda>::value) {
       int device;
       cudaGetDevice( &device );
       int expected_device = argstruct.device_id;
diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp
index c15f81223329eaa749d84fbef28340638fd3c835..185c1b791800867f9e9c2113dce40a714c3ac60a 100644
--- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp
@@ -67,8 +67,8 @@ protected:
 };
 
 
-TEST_F( defaultdevicetype, reduce_instantiation) {
-  TestReduceCombinatoricalInstantiation<>::execute();
+TEST_F( defaultdevicetype, reduce_instantiation_a) {
+  TestReduceCombinatoricalInstantiation<>::execute_a();
 }
 
 } // namespace test
diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp
similarity index 78%
rename from lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
rename to lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp
index be0134460b279f0cbb5f0bc1efda36863c0342ca..9aa5401871c5f0c1208c83a2370e958f4e7e1115 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp
@@ -41,35 +41,36 @@
 //@HEADER
 */
 
-#ifndef KOKKOS_HBW_ALLOCATORS_HPP
-#define KOKKOS_HBW_ALLOCATORS_HPP
+#include <gtest/gtest.h>
 
-#ifdef KOKKOS_HAVE_HBWSPACE
+#include <Kokkos_Core.hpp>
 
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
+#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__)
+//----------------------------------------------------------------------------
+
+#include <TestReduce.hpp>
 
-/// class MallocAllocator
-class HBWMallocAllocator
-{
-public:
-  static const char * name()
-  {
-    return "HBW Malloc Allocator";
-  }
 
-  static void* allocate(size_t size);
+namespace Test {
 
-  static void deallocate(void * ptr, size_t size);
+class defaultdevicetype : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
+  {
+    Kokkos::initialize();
+  }
 
-  static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
+  static void TearDownTestCase()
+  {
+    Kokkos::finalize();
+  }
 };
 
+
+TEST_F( defaultdevicetype, reduce_instantiation_b) {
+  TestReduceCombinatoricalInstantiation<>::execute_b();
 }
-}
-} // namespace Kokkos::Impl
-#endif //KOKKOS_HAVE_HBWSPACE
-#endif //KOKKOS_HBW_ALLOCATORS_HPP
 
+} // namespace test
 
+#endif
diff --git a/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp
similarity index 67%
rename from lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp
rename to lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp
index 575f2f2c254ecae81132c8e5f714e4fe6e71c14f..585658909225e25ea1f74646626923ffe2150920 100644
--- a/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp
@@ -43,58 +43,34 @@
 
 #include <gtest/gtest.h>
 
-#include <iostream>
 #include <Kokkos_Core.hpp>
 
-/*--------------------------------------------------------------------------*/
+#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__)
+//----------------------------------------------------------------------------
 
-namespace {
+#include <TestReduce.hpp>
 
-template<class Arg1>
-class TestMemorySpace {
-public:
 
-  typedef typename Arg1::memory_space MemorySpace;
-  TestMemorySpace() { run_test(); }
+namespace Test {
 
-  void run_test()
+class defaultdevicetype : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
   {
+    Kokkos::initialize();
+  }
 
-#if ! KOKKOS_USING_EXP_VIEW
-
-    Kokkos::View<int* ,Arg1> invalid;
-    ASSERT_EQ(0u, invalid.tracker().ref_count() );
-
-    {
-      Kokkos::View<int* ,Arg1> a("A",10);
-
-      ASSERT_EQ(1u, a.tracker().ref_count() );
-
-      {
-        Kokkos::View<int* ,Arg1> b = a;
-        ASSERT_EQ(2u, b.tracker().ref_count() );
-
-        Kokkos::View<int* ,Arg1> D("D",10);
-        ASSERT_EQ(1u, D.tracker().ref_count() );
-
-        {
-          Kokkos::View<int* ,Arg1> E("E",10);
-          ASSERT_EQ(1u, E.tracker().ref_count() );
-        }
-
-        ASSERT_EQ(2u, b.tracker().ref_count() );
-      }
-      ASSERT_EQ(1u, a.tracker().ref_count() );
-    }
-
-#endif
-
+  static void TearDownTestCase()
+  {
+    Kokkos::finalize();
   }
 };
 
-}
-
-/*--------------------------------------------------------------------------*/
 
+TEST_F( defaultdevicetype, reduce_instantiation_c) {
+  TestReduceCombinatoricalInstantiation<>::execute_c();
+}
 
+} // namespace test
 
+#endif
diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2659b5c380f8ea79ba99c93b09104c70652dfae9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp
@@ -0,0 +1,237 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+
+#include <Kokkos_Core.hpp>
+
+#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__)
+//----------------------------------------------------------------------------
+
+#include <TestAtomic.hpp>
+
+#include <TestViewAPI.hpp>
+
+#include <TestReduce.hpp>
+#include <TestScan.hpp>
+#include <TestTeam.hpp>
+#include <TestAggregate.hpp>
+#include <TestCompilerMacros.hpp>
+#include <TestCXX11.hpp>
+#include <TestTeamVector.hpp>
+#include <TestUtilities.hpp>
+
+namespace Test {
+
+class defaultdevicetype : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
+  {
+    Kokkos::initialize();
+  }
+
+  static void TearDownTestCase()
+  {
+    Kokkos::finalize();
+  }
+};
+
+TEST_F( defaultdevicetype, test_utilities) {
+  test_utilities();
+}
+
+TEST_F( defaultdevicetype, long_reduce) {
+  TestReduce< long ,   Kokkos::DefaultExecutionSpace >( 100000 );
+}
+
+TEST_F( defaultdevicetype, double_reduce) {
+  TestReduce< double ,   Kokkos::DefaultExecutionSpace >( 100000 );
+}
+
+TEST_F( defaultdevicetype, long_reduce_dynamic ) {
+  TestReduceDynamic< long ,   Kokkos::DefaultExecutionSpace >( 100000 );
+}
+
+TEST_F( defaultdevicetype, double_reduce_dynamic ) {
+  TestReduceDynamic< double ,   Kokkos::DefaultExecutionSpace >( 100000 );
+}
+
+TEST_F( defaultdevicetype, long_reduce_dynamic_view ) {
+  TestReduceDynamicView< long ,   Kokkos::DefaultExecutionSpace >( 100000 );
+}
+
+
+TEST_F( defaultdevicetype , atomics )
+{
+  const int loop_count = 1e4 ;
+
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) );
+}
+
+/*TEST_F( defaultdevicetype , view_remap )
+{
+  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
+
+  typedef Kokkos::View< double*[N1][N2][N3] ,
+                             Kokkos::LayoutRight ,
+                             Kokkos::DefaultExecutionSpace > output_type ;
+
+  typedef Kokkos::View< int**[N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::DefaultExecutionSpace > input_type ;
+
+  typedef Kokkos::View< int*[N0][N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::DefaultExecutionSpace > diff_type ;
+
+  output_type output( "output" , N0 );
+  input_type  input ( "input" , N0 , N1 );
+  diff_type   diff  ( "diff" , N0 );
+
+  int value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    input(i0,i1,i2,i3) = ++value ;
+  }}}}
+
+  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
+  Kokkos::deep_copy( output , input );
+
+  value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    ++value ;
+    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
+  }}}}
+}*/
+
+//----------------------------------------------------------------------------
+
+
+TEST_F( defaultdevicetype , view_aggregate )
+{
+  TestViewAggregate< Kokkos::DefaultExecutionSpace >();
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( defaultdevicetype , scan )
+{
+  TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 );
+  TestScan< Kokkos::DefaultExecutionSpace >( 1000000 );
+  TestScan< Kokkos::DefaultExecutionSpace >( 10000000 );
+  Kokkos::DefaultExecutionSpace::fence();
+}
+
+
+//----------------------------------------------------------------------------
+
+TEST_F( defaultdevicetype , compiler_macros )
+{
+  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) );
+}
+
+
+//----------------------------------------------------------------------------
+TEST_F( defaultdevicetype , cxx11 )
+{
+  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) );
+  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) );
+  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) );
+  ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) );
+}
+
+TEST_F( defaultdevicetype , team_vector )
+{
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) );
+}
+
+TEST_F( defaultdevicetype , malloc )
+{
+  int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int));
+  ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int)));
+  Kokkos::kokkos_free(data);
+
+  int* data2 = (int*) Kokkos::kokkos_malloc(0);
+  ASSERT_TRUE(data2==NULL);
+  Kokkos::kokkos_free(data2);
+}
+
+} // namespace test
+
+#endif
diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp
index cf650b0bc8baa1949643a57ffff808c83f406286..f83f390ac63b57269270c7317f76392c31d2165b 100644
--- a/lib/kokkos/core/unit_test/TestMemoryPool.hpp
+++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp
@@ -55,21 +55,28 @@
 //#define TESTMEMORYPOOL_PRINT
 //#define TESTMEMORYPOOL_PRINT_STATUS
 
+#define STRIDE 1
 #ifdef KOKKOS_HAVE_CUDA
-#define STRIDE 32
+#define STRIDE_ALLOC 32
 #else
-#define STRIDE 1
+#define STRIDE_ALLOC 1
 #endif
 
 namespace TestMemoryPool {
 
 struct pointer_obj {
   uint64_t *  ptr;
+
+  KOKKOS_INLINE_FUNCTION
+  pointer_obj() : ptr( 0 ) {}
 };
 
 struct pointer_obj2 {
   void *  ptr;
   size_t  size;
+
+  KOKKOS_INLINE_FUNCTION
+  pointer_obj2() : ptr( 0 ), size( 0 ) {}
 };
 
 template < typename PointerView, typename Allocator >
@@ -86,14 +93,14 @@ struct allocate_memory {
     : m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m )
   {
     // Initialize the view with the out degree of each vertex.
-    Kokkos::parallel_for( num_ptrs * STRIDE, *this );
+    Kokkos::parallel_for( num_ptrs * STRIDE_ALLOC, *this );
   }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( size_type i ) const
   {
-    if ( i % STRIDE == 0 ) {
-      m_pointers[i / STRIDE].ptr =
+    if ( i % STRIDE_ALLOC == 0 ) {
+      m_pointers[i / STRIDE_ALLOC].ptr =
         static_cast< uint64_t * >( m_mempool.allocate( m_chunk_size ) );
     }
   }
@@ -231,14 +238,14 @@ struct allocate_deallocate_memory {
       m_mempool( m )
   {
     // Initialize the view with the out degree of each vertex.
-    Kokkos::parallel_for( work_size * STRIDE, *this );
+    Kokkos::parallel_for( work_size * STRIDE_ALLOC, *this );
   }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( size_type i ) const
   {
-    if ( i % STRIDE == 0 ) {
-      unsigned my_work = m_work[i / STRIDE];
+    if ( i % STRIDE_ALLOC == 0 ) {
+      unsigned my_work = m_work[i / STRIDE_ALLOC];
 
       if ( ( my_work & 1 ) == 0 ) {
         // Allocation.
@@ -303,7 +310,7 @@ bool test_mempool( size_t chunk_size, size_t total_size )
   typedef Kokkos::View< pointer_obj *, device_type >       pointer_view;
   typedef Kokkos::Experimental::MemoryPool< device_type >  pool_memory_space;
 
-  uint64_t result;
+  uint64_t result = 0;
   size_t num_chunks = total_size / chunk_size;
   bool return_val = true;
 
@@ -805,16 +812,9 @@ void test_memory_exhaustion()
 
 }
 
-#ifdef TESTMEMORYPOOL_PRINT
 #undef TESTMEMORYPOOL_PRINT
-#endif
-
-#ifdef TESTMEMORYPOOL_PRINT_STATUS
 #undef TESTMEMORYPOOL_PRINT_STATUS
-#endif
-
-#ifdef STRIDE
 #undef STRIDE
-#endif
+#undef STRIDE_ALLOC
 
 #endif
diff --git a/lib/kokkos/core/unit_test/TestOpenMP_c.cpp b/lib/kokkos/core/unit_test/TestOpenMP_c.cpp
deleted file mode 100644
index f0cdabe913b8a4125fc5a1541823328d749759bf..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestOpenMP_c.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#include <gtest/gtest.h>
-
-#include <Kokkos_Macros.hpp>
-#ifdef KOKKOS_LAMBDA
-#undef KOKKOS_LAMBDA
-#endif
-#define KOKKOS_LAMBDA [=]
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemoryPool.hpp>
-#include <TestTaskPolicy.hpp>
-
-
-#include <TestCXX11.hpp>
-#include <TestCXX11Deduction.hpp>
-#include <TestTeamVector.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-
-#include <TestPolicyConstruction.hpp>
-
-
-namespace Test {
-
-class openmp : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
-TEST_F( openmp , view_remap )
-{
-  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
-
-  typedef Kokkos::View< double*[N1][N2][N3] ,
-                             Kokkos::LayoutRight ,
-                             Kokkos::OpenMP > output_type ;
-
-  typedef Kokkos::View< int**[N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::OpenMP > input_type ;
-
-  typedef Kokkos::View< int*[N0][N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::OpenMP > diff_type ;
-
-  output_type output( "output" , N0 );
-  input_type  input ( "input" , N0 , N1 );
-  diff_type   diff  ( "diff" , N0 );
-
-  int value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    input(i0,i1,i2,i3) = ++value ;
-  }}}}
-
-  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
-  Kokkos::deep_copy( output , input );
-
-  value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    ++value ;
-    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
-  }}}}
-}
-
-//----------------------------------------------------------------------------
-
-
-TEST_F( openmp , view_aggregate )
-{
-  TestViewAggregate< Kokkos::OpenMP >();
-  TestViewAggregateReduction< Kokkos::OpenMP >();
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( openmp , scan )
-{
-  TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 );
-  TestScan< Kokkos::OpenMP >( 1000000 );
-  TestScan< Kokkos::OpenMP >( 10000000 );
-  Kokkos::OpenMP::fence();
-}
-
-
-TEST_F( openmp , team_scan )
-{
-  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10 );
-  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
-  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10000 );
-  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( openmp , compiler_macros )
-{
-  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) );
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( openmp , memory_space )
-{
-  TestMemorySpace< Kokkos::OpenMP >();
-}
-
-TEST_F( openmp , memory_pool )
-{
-  bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 );
-  ASSERT_TRUE( val );
-
-  TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 );
-
-  TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >();
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( openmp , template_meta_functions )
-{
-  TestTemplateMetaFunctions<int, Kokkos::OpenMP >();
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
-TEST_F( openmp , cxx11 )
-{
-  if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) {
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) );
-  }
-}
-#endif
-
-TEST_F( openmp , reduction_deduction )
-{
-  TestCXX11::test_reduction_deduction< Kokkos::OpenMP >();
-}
-
-TEST_F( openmp , team_vector )
-{
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) );
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-TEST_F( openmp , task_fib )
-{
-  for ( int i = 0 ; i < 25 ; ++i ) {
-    TestTaskPolicy::TestFib< Kokkos::OpenMP >::run(i, (i+1)*1000000 );
-  }
-}
-
-TEST_F( openmp , task_depend )
-{
-  for ( int i = 0 ; i < 25 ; ++i ) {
-    TestTaskPolicy::TestTaskDependence< Kokkos::OpenMP >::run(i);
-  }
-}
-
-TEST_F( openmp , task_team )
-{
-  TestTaskPolicy::TestTaskTeam< Kokkos::OpenMP >::run(1000);
-  //TestTaskPolicy::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //TODO put back after testing
-}
-
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-
-
-} // namespace test
-
-
-
-
-
-
diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp
index 049138eb07cd402140f1d509a3590eb8e3eb6104..1bb45481c9b76d6dde29ff9e9d192d5ae4531829 100644
--- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp
+++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp
@@ -58,6 +58,10 @@ public:
   }
 private:
   void test_compile_time_parameters() {
+    {
+      Kokkos::Impl::expand_variadic();
+      Kokkos::Impl::expand_variadic(1,2,3);
+    }
     {
       typedef Kokkos::RangePolicy<> policy_t;
       typedef typename policy_t::execution_space execution_space;
diff --git a/lib/kokkos/core/unit_test/TestQthread.cpp b/lib/kokkos/core/unit_test/TestQthread.cpp
index 431b844c9f4e60030f546fba320088f5eecf89c5..a465f39ca8ab428b72b68c103ec3989c92fb670f 100644
--- a/lib/kokkos/core/unit_test/TestQthread.cpp
+++ b/lib/kokkos/core/unit_test/TestQthread.cpp
@@ -46,11 +46,8 @@
 #include <Kokkos_Core.hpp>
 #include <Kokkos_Qthread.hpp>
 
-#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp>
-
 //----------------------------------------------------------------------------
 
-#include <TestViewImpl.hpp>
 #include <TestAtomic.hpp>
 
 #include <TestViewAPI.hpp>
@@ -62,7 +59,7 @@
 #include <TestScan.hpp>
 #include <TestAggregate.hpp>
 #include <TestCompilerMacros.hpp>
-#include <TestTaskPolicy.hpp>
+#include <TestTaskScheduler.hpp>
 // #include <TestTeamVector.hpp>
 
 namespace Test {
@@ -274,14 +271,14 @@ TEST_F( qthread , team_vector )
 
 TEST_F( qthread , task_policy )
 {
-  TestTaskPolicy::test_task_dep< Kokkos::Qthread >( 10 );
-  for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Qthread >(i);
-  for ( long i = 0 ; i < 35 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Qthread >(i);
+  TestTaskScheduler::test_task_dep< Kokkos::Qthread >( 10 );
+  for ( long i = 0 ; i < 25 ; ++i ) TestTaskScheduler::test_fib< Kokkos::Qthread >(i);
+  for ( long i = 0 ; i < 35 ; ++i ) TestTaskScheduler::test_fib2< Kokkos::Qthread >(i);
 }
 
 TEST_F( qthread , task_team )
 {
-  TestTaskPolicy::test_task_team< Kokkos::Qthread >(1000);
+  TestTaskScheduler::test_task_team< Kokkos::Qthread >(1000);
 }
 
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp
index be8b4f90a32d96ad12ff4bf3baafd4ab8dec11ca..e342e844c7665650732a38e49063abee626a4a8c 100644
--- a/lib/kokkos/core/unit_test/TestRange.hpp
+++ b/lib/kokkos/core/unit_test/TestRange.hpp
@@ -185,7 +185,7 @@ struct TestRange {
       },error);
       ASSERT_EQ(error,0);
 
-      if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<const size_t>(4*ExecSpace::concurrency())) ) {
+      if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<size_t>(4*ExecSpace::concurrency())) ) {
         size_t min = N;
         size_t max = 0;
         for(int t=0; t<ExecSpace::concurrency(); t++) {
@@ -219,7 +219,7 @@ struct TestRange {
       },error);
       ASSERT_EQ(error,0);
 
-      if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<const size_t>(4*ExecSpace::concurrency())) ) {
+      if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<size_t>(4*ExecSpace::concurrency())) ) {
         size_t min = N;
         size_t max = 0;
         for(int t=0; t<ExecSpace::concurrency(); t++) {
diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp
index 53fc393bcc29e6133e4d71ffab87815b935ec9f9..a15fab17a62f2a12b8e4284a8f07aa7a2e4f1faa 100644
--- a/lib/kokkos/core/unit_test/TestReduce.hpp
+++ b/lib/kokkos/core/unit_test/TestReduce.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -373,8 +373,16 @@ public:
 
     for ( unsigned i = 0 ; i < Repeat ; ++i ) {
       for ( unsigned j = 0 ; j < Count ; ++j ) {
-        const unsigned long correct = j % 2 ? 1 : nwork ;
-        ASSERT_EQ( (ScalarType) correct , result[i][j] );
+        if ( nwork == 0 )
+        {
+          ScalarType amin( std::numeric_limits<ScalarType>::min() );
+          ScalarType amax( std::numeric_limits<ScalarType>::max() );
+          const ScalarType correct = (j%2) ? amax : amin;
+          ASSERT_EQ( (ScalarType) correct , result[i][j] );
+        } else {
+          const unsigned long correct = j % 2 ? 1 : nwork ;
+          ASSERT_EQ( (ScalarType) correct , result[i][j] );
+        }
       }
     }
   }
@@ -473,13 +481,13 @@ public:
 
   //------------------------------------
 
-  TestTripleNestedReduce( const size_type & nrows , const size_type & ncols 
+  TestTripleNestedReduce( const size_type & nrows , const size_type & ncols
                         , const size_type & team_size , const size_type & vector_length )
   {
     run_test( nrows , ncols , team_size, vector_length );
   }
 
-  void run_test( const size_type & nrows , const size_type & ncols 
+  void run_test( const size_type & nrows , const size_type & ncols
                , const size_type & team_size, const size_type & vector_length )
   {
     //typedef Kokkos::LayoutLeft Layout;
@@ -510,7 +518,7 @@ public:
       } );
     } );
 
-    // Three level parallelism kernel to force caching of vector x 
+    // Three level parallelism kernel to force caching of vector x
     ScalarType result = 0.0;
     int chunk_size = 128;
     Kokkos::parallel_reduce( team_policy( nrows/chunk_size , team_size , vector_length ) , KOKKOS_LAMBDA ( const member_type& teamMember , double &update ) {
@@ -541,7 +549,7 @@ public:
   typedef DeviceType execution_space ;
   typedef typename execution_space::size_type size_type ;
 
-  TestTripleNestedReduce( const size_type & , const size_type  
+  TestTripleNestedReduce( const size_type & , const size_type
                         , const size_type & , const size_type )
   { }
 };
@@ -1059,16 +1067,19 @@ struct TestReduceCombinatoricalInstantiation {
   }
 
 
-  static void AddLabel() {
-    std::string s("Std::String");
+  static void execute_a() {
     AddPolicy();
-    AddPolicy("Char Constant");
+  }
+
+  static void execute_b() {
+    std::string s("Std::String");
     AddPolicy(s.c_str());
-    AddPolicy(s);
+    AddPolicy("Char Constant");
   }
 
-  static void execute() {
-    AddLabel();
+  static void execute_c() {
+    std::string s("Std::String");
+    AddPolicy(s);
   }
 };
 
@@ -1420,6 +1431,9 @@ struct TestReducers {
       if(h_values(i)<reference_min) {
         reference_min = h_values(i);
         reference_loc = i;
+      } else if (h_values(i) == reference_min) {
+        // make min unique
+        h_values(i) += std::numeric_limits<Scalar>::epsilon();
       }
     }
     Kokkos::deep_copy(values,h_values);
@@ -1484,6 +1498,9 @@ struct TestReducers {
       if(h_values(i)>reference_max) {
         reference_max = h_values(i);
         reference_loc = i;
+      } else if (h_values(i) == reference_max) {
+        // make max unique
+        h_values(i) -= std::numeric_limits<Scalar>::epsilon();
       }
     }
     Kokkos::deep_copy(values,h_values);
@@ -1547,13 +1564,23 @@ struct TestReducers {
      int reference_maxloc = -1;
      for(int i=0; i<N; i++) {
        h_values(i) = (Scalar)(rand()%100000);
+     }
+     for(int i=0; i<N; i++) {
        if(h_values(i)>reference_max) {
          reference_max = h_values(i);
          reference_maxloc = i;
+       } else if (h_values(i) == reference_max) {
+         // make max unique
+         h_values(i) -= std::numeric_limits<Scalar>::epsilon();
        }
+     }
+     for(int i=0; i<N; i++) {
        if(h_values(i)<reference_min) {
          reference_min = h_values(i);
          reference_minloc = i;
+       } else if (h_values(i) == reference_min) {
+         // make min unique
+         h_values(i) += std::numeric_limits<Scalar>::epsilon();
        }
      }
      Kokkos::deep_copy(values,h_values);
@@ -1570,8 +1597,16 @@ struct TestReducers {
        Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar(minmax_scalar);
        Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
        ASSERT_EQ(minmax_scalar.min_val,reference_min);
+       for(int i=0; i<N; i++) {
+         if((i == minmax_scalar.min_loc) && (h_values(i)==reference_min))
+           reference_minloc = i;
+       }
        ASSERT_EQ(minmax_scalar.min_loc,reference_minloc);
        ASSERT_EQ(minmax_scalar.max_val,reference_max);
+       for(int i=0; i<N; i++) {
+         if((i == minmax_scalar.max_loc) && (h_values(i)==reference_max))
+           reference_maxloc = i;
+       }
        ASSERT_EQ(minmax_scalar.max_loc,reference_maxloc);
        value_type minmax_scalar_view = reducer_scalar.result_view()();
        ASSERT_EQ(minmax_scalar_view.min_val,reference_min);
diff --git a/lib/kokkos/core/unit_test/TestSerial.cpp b/lib/kokkos/core/unit_test/TestSerial.cpp
deleted file mode 100644
index d85614e66e67af2ccae9979d7f3869cbf5165c1d..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestSerial.cpp
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-//
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-#include <gtest/gtest.h>
-
-#include <Kokkos_Macros.hpp>
-#ifdef KOKKOS_LAMBDA
-#undef KOKKOS_LAMBDA
-#endif
-#define KOKKOS_LAMBDA [=]
-
-#include <Kokkos_Core.hpp>
-
-#include <impl/Kokkos_ViewTileLeft.hpp>
-#include <TestTile.hpp>
-
-#include <impl/Kokkos_Serial_TaskPolicy.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestViewImpl.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewOfClass.hpp>
-#include <TestViewSubview.hpp>
-#include <TestAtomic.hpp>
-#include <TestAtomicOperations.hpp>
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestTaskPolicy.hpp>
-#include <TestMemoryPool.hpp>
-
-
-#include <TestCXX11.hpp>
-#include <TestCXX11Deduction.hpp>
-#include <TestTeamVector.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-
-#include <TestPolicyConstruction.hpp>
-
-#include <TestMDRange.hpp>
-
-namespace Test {
-
-class serial : public ::testing::Test {
-protected:
-  static void SetUpTestCase()
-    {
-      Kokkos::HostSpace::execution_space::initialize();
-    }
-  static void TearDownTestCase()
-    {
-      Kokkos::HostSpace::execution_space::finalize();
-    }
-};
-
-TEST_F( serial , md_range ) {
-  TestMDRange_2D< Kokkos::Serial >::test_for2(100,100);
-
-  TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100);
-}
-
-TEST_F( serial , impl_shared_alloc ) {
-  test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >();
-}
-
-TEST_F( serial, policy_construction) {
-  TestRangePolicyConstruction< Kokkos::Serial >();
-  TestTeamPolicyConstruction< Kokkos::Serial >();
-}
-
-TEST_F( serial , impl_view_mapping ) {
-  test_view_mapping< Kokkos::Serial >();
-  test_view_mapping_subview< Kokkos::Serial >();
-  test_view_mapping_operator< Kokkos::Serial >();
-  TestViewMappingAtomic< Kokkos::Serial >::run();
-}
-
-TEST_F( serial, view_impl) {
-  test_view_impl< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_api) {
-  TestViewAPI< double , Kokkos::Serial >();
-}
-
-TEST_F( serial , view_nested_view )
-{
-  ::Test::view_nested_view< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_auto_1d_left ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_auto_1d_right ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_auto_1d_stride ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_assign_strided ) {
-  TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_left_0 ) {
-  TestViewSubview::test_left_0< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_left_1 ) {
-  TestViewSubview::test_left_1< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_left_2 ) {
-  TestViewSubview::test_left_2< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_left_3 ) {
-  TestViewSubview::test_left_3< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_right_0 ) {
-  TestViewSubview::test_right_0< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_right_1 ) {
-  TestViewSubview::test_right_1< Kokkos::Serial >();
-}
-
-TEST_F( serial, view_subview_right_3 ) {
-  TestViewSubview::test_right_3< Kokkos::Serial >();
-}
-
-TEST_F( serial , range_tag )
-{
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
-  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
-}
-
-TEST_F( serial , team_tag )
-{
-  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
-  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
-}
-
-TEST_F( serial, long_reduce) {
-  TestReduce< long ,   Kokkos::Serial >( 1000000 );
-}
-
-TEST_F( serial, double_reduce) {
-  TestReduce< double ,   Kokkos::Serial >( 1000000 );
-}
-
-TEST_F( serial , reducers )
-{
-  TestReducers<int, Kokkos::Serial>::execute_integer();
-  TestReducers<size_t, Kokkos::Serial>::execute_integer();
-  TestReducers<double, Kokkos::Serial>::execute_float();
-  TestReducers<Kokkos::complex<double>, Kokkos::Serial>::execute_basic();
-}
-
-TEST_F( serial, long_reduce_dynamic ) {
-  TestReduceDynamic< long ,   Kokkos::Serial >( 1000000 );
-}
-
-TEST_F( serial, double_reduce_dynamic ) {
-  TestReduceDynamic< double ,   Kokkos::Serial >( 1000000 );
-}
-
-TEST_F( serial, long_reduce_dynamic_view ) {
-  TestReduceDynamicView< long ,   Kokkos::Serial >( 1000000 );
-}
-
-TEST_F( serial , scan )
-{
-  TestScan< Kokkos::Serial >::test_range( 1 , 1000 );
-  TestScan< Kokkos::Serial >( 10 );
-  TestScan< Kokkos::Serial >( 10000 );
-}
-
-TEST_F( serial , team_long_reduce) {
-  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( serial , team_double_reduce) {
-  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( serial , team_shared_request) {
-  TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >();
-  TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >();
-}
-
-#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
-TEST_F( serial , team_lambda_shared_request) {
-  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >();
-  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >();
-}
-#endif
-
-TEST_F( serial, shmem_size) {
-  TestShmemSize< Kokkos::Serial >();
-}
-
-TEST_F( serial  , team_scan )
-{
-  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10 );
-  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
-  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10000 );
-  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
-}
-
-
-TEST_F( serial , view_remap )
-{
-  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
-
-  typedef Kokkos::View< double*[N1][N2][N3] ,
-                             Kokkos::LayoutRight ,
-                             Kokkos::Serial > output_type ;
-
-  typedef Kokkos::View< int**[N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::Serial > input_type ;
-
-  typedef Kokkos::View< int*[N0][N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::Serial > diff_type ;
-
-  output_type output( "output" , N0 );
-  input_type  input ( "input" , N0 , N1 );
-  diff_type   diff  ( "diff" , N0 );
-
-  int value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    input(i0,i1,i2,i3) = ++value ;
-  }}}}
-
-  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
-  Kokkos::deep_copy( output , input );
-
-  value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    ++value ;
-    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
-  }}}}
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( serial , view_aggregate )
-{
-  TestViewAggregate< Kokkos::Serial >();
-  TestViewAggregateReduction< Kokkos::Serial >();
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( serial , atomics )
-{
-  const int loop_count = 1e6 ;
-
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,3) ) );
-}
-
-TEST_F( serial , atomic_operations )
-{
-  const int start = 1; //Avoid zero for division
-  const int end = 11;
-  for (int i = start; i < end; ++i)
-  {
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 4 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 4 ) ) );
-  }
-
-}
-//----------------------------------------------------------------------------
-
-TEST_F( serial, tile_layout )
-{
-  TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 );
-  TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 );
-  TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 );
-
-  TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 );
-  TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 );
-  TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 );
-  TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 );
-
-  TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 );
-  TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 );
-
-  TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 );
-  TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 );
-  TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 );
-  TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 );
-
-  TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 );
-  TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 );
-  TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 );
-  TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 );
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( serial , compiler_macros )
-{
-  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) );
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( serial , memory_space )
-{
-  TestMemorySpace< Kokkos::Serial >();
-}
-
-TEST_F( serial , memory_pool )
-{
-  bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 );
-  ASSERT_TRUE( val );
-
-  TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 );
-
-  TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >();
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-TEST_F( serial , task_fib )
-{
-  for ( int i = 0 ; i < 25 ; ++i ) {
-    TestTaskPolicy::TestFib< Kokkos::Serial >::run(i);
-  }
-}
-
-TEST_F( serial , task_depend )
-{
-  for ( int i = 0 ; i < 25 ; ++i ) {
-    TestTaskPolicy::TestTaskDependence< Kokkos::Serial >::run(i);
-  }
-}
-
-TEST_F( serial , task_team )
-{
-  TestTaskPolicy::TestTaskTeam< Kokkos::Serial >::run(1000);
-  //TestTaskPolicy::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing
-}
-
-TEST_F( serial , old_task_policy )
-{
-  TestTaskPolicy::test_task_dep< Kokkos::Serial >( 10 );
-  // TestTaskPolicy::test_norm2< Kokkos::Serial >( 1000 );
-  // for ( long i = 0 ; i < 30 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i);
-  // for ( long i = 0 ; i < 40 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i);
-  for ( long i = 0 ; i < 20 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i);
-  for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i);
-}
-
-TEST_F( serial , old_task_team )
-{
-  TestTaskPolicy::test_task_team< Kokkos::Serial >(1000);
-}
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-
-//----------------------------------------------------------------------------
-
-TEST_F( serial , template_meta_functions )
-{
-  TestTemplateMetaFunctions<int, Kokkos::Serial >();
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
-TEST_F( serial , cxx11 )
-{
-  if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) {
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) );
-  }
-}
-#endif
-
-TEST_F( serial , reduction_deduction )
-{
-  TestCXX11::test_reduction_deduction< Kokkos::Serial >();
-}
-
-TEST_F( serial , team_vector )
-{
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) );
-}
-
-} // namespace test
-
diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp
index 61166888142e7f666b303dc1c837daa34c07a00c..291f9f60e4b8050e11b653f3f3ae975f1d1e8c91 100644
--- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp
+++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp
@@ -62,7 +62,7 @@ struct SharedAllocDestroy {
 
   void destroy_shared_allocation()
     {
-      Kokkos::atomic_fetch_add( count , 1 );
+      Kokkos::atomic_increment( count );
     }
 
 };
@@ -72,11 +72,11 @@ void test_shared_alloc()
 {
 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
 
-  typedef const Kokkos::Experimental::Impl::SharedAllocationHeader   Header ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationTracker  Tracker ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void >                       RecordBase ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void >                RecordMemS ;
-  typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy >  RecordFull ;
+  typedef const Kokkos::Impl::SharedAllocationHeader   Header ;
+  typedef Kokkos::Impl::SharedAllocationTracker  Tracker ;
+  typedef Kokkos::Impl::SharedAllocationRecord< void , void >                       RecordBase ;
+  typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , void >                RecordMemS ;
+  typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy >  RecordFull ;
 
   static_assert( sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!" );
 
diff --git a/lib/kokkos/core/unit_test/TestSynchronic.cpp b/lib/kokkos/core/unit_test/TestSynchronic.cpp
index 9121dc15a17ecead1895ce1df660c1d25a2deda2..f6a3f38e3f9f3c5cd40145777364cf0e2bc3cf57 100644
--- a/lib/kokkos/core/unit_test/TestSynchronic.cpp
+++ b/lib/kokkos/core/unit_test/TestSynchronic.cpp
@@ -29,7 +29,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
 //#undef _WIN32_WINNT
 //#define _WIN32_WINNT 0x0602
 
-#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__APPLE__)
+#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__APPLE__) || defined(__ARM_ARCH_8A)
 
 // Skip for now
 
diff --git a/lib/kokkos/core/unit_test/TestSynchronic.hpp b/lib/kokkos/core/unit_test/TestSynchronic.hpp
index d820129e8b571fa5eac2dc7f8d5016c47cd589f4..f4341b97815b8d70956dfb85cf0d41a4f07bab4d 100644
--- a/lib/kokkos/core/unit_test/TestSynchronic.hpp
+++ b/lib/kokkos/core/unit_test/TestSynchronic.hpp
@@ -31,6 +31,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <impl/Kokkos_Synchronic.hpp>
 #include <mutex>
+#include <cmath>
 
 namespace Test {
 
diff --git a/lib/kokkos/core/unit_test/TestTaskPolicy.hpp b/lib/kokkos/core/unit_test/TestTaskPolicy.hpp
deleted file mode 100644
index 71790f6def82d50a12d37d88e0b0e7d17f28799f..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestTaskPolicy.hpp
+++ /dev/null
@@ -1,1145 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-
-#ifndef KOKKOS_UNITTEST_TASKPOLICY_HPP
-#define KOKKOS_UNITTEST_TASKPOLICY_HPP
-
-#include <stdio.h>
-#include <iostream>
-#include <cmath>
-#include <Kokkos_TaskPolicy.hpp>
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace TestTaskPolicy {
-
-namespace {
-
-long eval_fib( long n )
-{
-  constexpr long mask = 0x03 ;
-
-  long fib[4] = { 0 , 1 , 1 , 2 };
-
-  for ( long i = 2 ; i <= n ; ++i ) {
-    fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ];
-  }
-  
-  return fib[ n & mask ];
-}
-
-}
-
-template< typename Space >
-struct TestFib
-{
-  typedef Kokkos::TaskPolicy<Space>  policy_type ;
-  typedef Kokkos::Future<long,Space> future_type ;
-  typedef long value_type ;
-
-  policy_type policy ;
-  future_type fib_m1 ;
-  future_type fib_m2 ;
-  const value_type n ;
-
-  KOKKOS_INLINE_FUNCTION
-  TestFib( const policy_type & arg_policy , const value_type arg_n )
-    : policy(arg_policy)
-    , fib_m1() , fib_m2()
-    , n( arg_n )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( typename policy_type::member_type & , value_type & result )
-    {
-#if 0
-      printf( "\nTestFib(%ld) %d %d\n"
-             , n
-             , int( ! fib_m1.is_null() )
-             , int( ! fib_m2.is_null() )
-             );
-#endif
-
-      if ( n < 2 ) {
-        result = n ;
-      }
-      else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) {
-        result = fib_m1.get() + fib_m2.get();
-      }
-      else {
-
-        // Spawn new children and respawn myself to sum their results:
-        // Spawn lower value at higher priority as it has a shorter
-        // path to completion.
-
-        fib_m2 = policy.task_spawn( TestFib(policy,n-2)
-                                  , Kokkos::TaskSingle
-                                  , Kokkos::TaskHighPriority );
-
-        fib_m1 = policy.task_spawn( TestFib(policy,n-1)
-                                  , Kokkos::TaskSingle );
-
-        Kokkos::Future<Space> dep[] = { fib_m1 , fib_m2 };
-
-        Kokkos::Future<Space> fib_all = policy.when_all( 2 , dep );
-
-        if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) {
-          // High priority to retire this branch
-          policy.respawn( this , Kokkos::TaskHighPriority , fib_all );
-        }
-        else {
-#if 0
-      printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n"
-             , n
-             , policy.allocation_capacity()
-             , policy.allocated_task_count_max()
-             , policy.allocated_task_count_accum()
-             );
-#endif
-          Kokkos::abort("TestFib insufficient memory");
-
-        }
-      }
-    }
-
-  static void run( int i , size_t MemoryCapacity = 16000 )
-    {
-      typedef typename policy_type::memory_space memory_space ;
-
-      enum { Log2_SuperBlockSize = 12 };
-
-      policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize );
-
-      future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle );
-      Kokkos::wait( root_policy );
-      ASSERT_EQ( eval_fib(i) , f.get() );
-
-#if 0
-      fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n"
-             , i
-             , int(root_policy.template spawn_allocation_size<TestFib>())
-             , int(root_policy.when_all_allocation_size(2))
-             , root_policy.allocation_capacity()
-             , root_policy.allocated_task_count_max()
-             , root_policy.allocated_task_count_accum()
-             );
-      fflush( stdout );
-#endif
-    }
-
-};
-
-} // namespace TestTaskPolicy
-
-//----------------------------------------------------------------------------
-
-namespace TestTaskPolicy {
-
-template< class Space >
-struct TestTaskDependence {
-
-  typedef Kokkos::TaskPolicy<Space>  policy_type ;
-  typedef Kokkos::Future<Space>      future_type ;
-  typedef Kokkos::View<long,Space>   accum_type ;
-  typedef void value_type ;
-
-  policy_type  m_policy ;
-  accum_type   m_accum ;
-  long         m_count ;
-
-  KOKKOS_INLINE_FUNCTION
-  TestTaskDependence( long n
-                    , const policy_type & arg_policy
-                    , const accum_type  & arg_accum )
-    : m_policy( arg_policy )
-    , m_accum( arg_accum )
-    , m_count( n )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( typename policy_type::member_type & )
-    {
-       enum { CHUNK = 8 };
-       const int n = CHUNK < m_count ? CHUNK : m_count ;
-
-       if ( 1 < m_count ) {
-         future_type f[ CHUNK ] ;
-
-         const int inc = ( m_count + n - 1 ) / n ;
-
-         for ( int i = 0 ; i < n ; ++i ) {
-           long begin = i * inc ;
-           long count = begin + inc < m_count ? inc : m_count - begin ;
-           f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle );
-         }
-
-         m_count = 0 ;
-
-         m_policy.respawn( this , m_policy.when_all( n , f ) );
-       }
-       else if ( 1 == m_count ) {
-         Kokkos::atomic_increment( & m_accum() );
-       }
-    }
-
-  static void run( int n )
-    {
-      typedef typename policy_type::memory_space memory_space ;
-
-      // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool
-      enum { MemoryCapacity = 16000 };
-      enum { Log2_SuperBlockSize = 12 };
-      policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize );
-
-      accum_type accum("accum");
-
-      typename accum_type::HostMirror host_accum =
-        Kokkos::create_mirror_view( accum );
-
-      policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle );
-
-      Kokkos::wait( policy );
-
-      Kokkos::deep_copy( host_accum , accum );
-
-      ASSERT_EQ( host_accum() , n );
-    }
-};
-
-} // namespace TestTaskPolicy
-
-//----------------------------------------------------------------------------
-
-namespace TestTaskPolicy {
-
-template< class ExecSpace >
-struct TestTaskTeam {
-
-  //enum { SPAN = 8 };
-  enum { SPAN = 33 };
-  //enum { SPAN = 1 };
-
-  typedef void value_type ;
-  typedef Kokkos::TaskPolicy<ExecSpace>  policy_type ;
-  typedef Kokkos::Future<ExecSpace>      future_type ;
-  typedef Kokkos::View<long*,ExecSpace>  view_type ;
-
-  policy_type  policy ;
-  future_type  future ;
-
-  view_type  parfor_result ;
-  view_type  parreduce_check ;
-  view_type  parscan_result ;
-  view_type  parscan_check ;
-  const long nvalue ;
-
-  KOKKOS_INLINE_FUNCTION
-  TestTaskTeam( const policy_type & arg_policy
-              , const view_type   & arg_parfor_result
-              , const view_type   & arg_parreduce_check
-              , const view_type   & arg_parscan_result
-              , const view_type   & arg_parscan_check
-              , const long          arg_nvalue )
-    : policy(arg_policy)
-    , future()
-    , parfor_result( arg_parfor_result )
-    , parreduce_check( arg_parreduce_check )
-    , parscan_result( arg_parscan_result )
-    , parscan_check( arg_parscan_check )
-    , nvalue( arg_nvalue )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( typename policy_type::member_type & member )
-    {
-      const long end   = nvalue + 1 ;
-      const long begin = 0 < end - SPAN ? end - SPAN : 0 ;
-
-      if ( 0 < begin && future.is_null() ) {
-        if ( member.team_rank() == 0 ) {
-          future = policy.task_spawn
-            ( TestTaskTeam( policy ,
-                            parfor_result ,
-                            parreduce_check,
-                            parscan_result,
-                            parscan_check,
-                            begin - 1 )
-            , Kokkos::TaskTeam );
-
-          assert( ! future.is_null() );
-
-          policy.respawn( this , future );
-        }
-        return ;
-      }
-
-      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i ) { parfor_result[i] = i ; }
-                          );
-
-      // test parallel_reduce without join
-    
-      long tot = 0;
-      long expected = (begin+end-1)*(end-begin)*0.5;
-      
-      Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i, long &res) { res += parfor_result[i]; }
-                          , tot);
-      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i ) { parreduce_check[i] = expected-tot ; }
-                          );
-
-      // test parallel_reduce with join
-
-      tot = 0;
-      Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i, long &res) { res += parfor_result[i]; }
-                          , [&]( long& val1, const long& val2) { val1 += val2; }
-                          , tot);
-      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i ) { parreduce_check[i] += expected-tot ; }
-                          );
-
-#if 0
-      // test parallel_scan
-
-      // Exclusive scan
-      Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i, long &val , const bool final ) {
-                              if ( final ) { parscan_result[i] = val; }
-                              val += i;
-                            }
-                          );
-
-      if ( member.team_rank() == 0 ) {
-        for ( long i = begin ; i < end ; ++i ) {
-          parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i];
-        }
-      }
-
-      // Inclusive scan
-      Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i, long &val , const bool final ) {
-                              val += i;
-                              if ( final ) { parscan_result[i] = val; }
-                            }
-                          );
-
-      if ( member.team_rank() == 0 ) {
-        for ( long i = begin ; i < end ; ++i ) {
-          parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i];
-        }
-      }
-#endif
-
-    }
-
-  static void run( long n )
-    {
-      // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop
-      // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP
-      const unsigned memory_capacity = 400000 ;
-
-      policy_type root_policy( typename policy_type::memory_space()
-                        , memory_capacity );
-
-      view_type   root_parfor_result("parfor_result",n+1);
-      view_type   root_parreduce_check("parreduce_check",n+1);
-      view_type   root_parscan_result("parscan_result",n+1);
-      view_type   root_parscan_check("parscan_check",n+1);
-
-      typename view_type::HostMirror
-        host_parfor_result = Kokkos::create_mirror_view( root_parfor_result );
-      typename view_type::HostMirror
-        host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check );
-      typename view_type::HostMirror
-        host_parscan_result = Kokkos::create_mirror_view( root_parscan_result );
-      typename view_type::HostMirror
-        host_parscan_check = Kokkos::create_mirror_view( root_parscan_check );
-
-      future_type f = root_policy.host_spawn(
-                        TestTaskTeam( root_policy ,
-                                      root_parfor_result ,
-                                      root_parreduce_check ,
-                                      root_parscan_result,
-                                      root_parscan_check,
-                                      n ) ,
-                        Kokkos::TaskTeam );
-
-      Kokkos::wait( root_policy );
-
-      Kokkos::deep_copy( host_parfor_result , root_parfor_result );
-      Kokkos::deep_copy( host_parreduce_check , root_parreduce_check );
-      Kokkos::deep_copy( host_parscan_result , root_parscan_result );
-      Kokkos::deep_copy( host_parscan_check , root_parscan_check );
-
-      for ( long i = 0 ; i <= n ; ++i ) {
-        const long answer = i ;
-        if ( host_parfor_result(i) != answer ) {
-          std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = "
-                    << host_parfor_result(i) << " != " << answer << std::endl ;
-        }
-        if ( host_parreduce_check(i) != 0 ) {
-          std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = "
-                    << host_parreduce_check(i) << " != 0" << std::endl ;
-        } //TODO
-        if ( host_parscan_check(i) != 0 ) {
-          std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = "
-                    << host_parscan_check(i) << " != 0" << std::endl ;
-        }
-      }
-    }
-};
-
-template< class ExecSpace >
-struct TestTaskTeamValue {
-
-  enum { SPAN = 8 };
-
-  typedef long value_type ;
-  typedef Kokkos::TaskPolicy<ExecSpace>         policy_type ;
-  typedef Kokkos::Future<value_type,ExecSpace>  future_type ;
-  typedef Kokkos::View<long*,ExecSpace>         view_type ;
-
-  policy_type  policy ;
-  future_type  future ;
-
-  view_type  result ;
-  const long nvalue ;
-
-  KOKKOS_INLINE_FUNCTION
-  TestTaskTeamValue( const policy_type & arg_policy
-                   , const view_type   & arg_result
-                   , const long          arg_nvalue )
-    : policy(arg_policy)
-    , future()
-    , result( arg_result )
-    , nvalue( arg_nvalue )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( typename policy_type::member_type const & member
-                 , value_type & final )
-    {
-      const long end   = nvalue + 1 ;
-      const long begin = 0 < end - SPAN ? end - SPAN : 0 ;
-
-      if ( 0 < begin && future.is_null() ) {
-        if ( member.team_rank() == 0 ) {
-
-          future = policy.task_spawn
-            ( TestTaskTeamValue( policy , result , begin - 1 )
-            , Kokkos::TaskTeam );
-
-          assert( ! future.is_null() );
-
-          policy.respawn( this , future );
-        }
-        return ;
-      }
-
-      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i ) { result[i] = i + 1 ; }
-                          );
-
-      if ( member.team_rank() == 0 ) {
-        final = result[nvalue] ;
-      }
-
-      Kokkos::memory_fence();
-    }
-
-  static void run( long n )
-    {
-      // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop
-      const unsigned memory_capacity = 100000 ;
-
-      policy_type root_policy( typename policy_type::memory_space()
-                             , memory_capacity );
-
-      view_type   root_result("result",n+1);
-
-      typename view_type::HostMirror
-        host_result = Kokkos::create_mirror_view( root_result );
-
-      future_type fv = root_policy.host_spawn
-        ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam );
-
-      Kokkos::wait( root_policy );
-
-      Kokkos::deep_copy( host_result , root_result );
-
-      if ( fv.get() != n + 1 ) {
-        std::cerr << "TestTaskTeamValue ERROR future = "
-                  << fv.get() << " != " << n + 1 << std::endl ;
-      }
-      for ( long i = 0 ; i <= n ; ++i ) {
-        const long answer = i + 1 ;
-        if ( host_result(i) != answer ) {
-          std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = "
-                    << host_result(i) << " != " << answer << std::endl ;
-        }
-      }
-    }
-};
-} // namespace TestTaskPolicy
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace TestTaskPolicy {
-
-template< class ExecSpace >
-struct FibChild {
-
-  typedef long value_type ;
-
-  Kokkos::Experimental::TaskPolicy<ExecSpace> policy ;
-  Kokkos::Experimental::Future<long,ExecSpace> fib_1 ;
-  Kokkos::Experimental::Future<long,ExecSpace> fib_2 ;
-  const value_type n ;
-  int has_nested ;
-
-  KOKKOS_INLINE_FUNCTION
-  FibChild( const Kokkos::Experimental::TaskPolicy<ExecSpace> & arg_policy
-          , const value_type arg_n )
-    : policy(arg_policy)
-    , fib_1() , fib_2()
-    , n( arg_n ), has_nested(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply( value_type & result )
-    {
-      typedef Kokkos::Experimental::Future<long,ExecSpace> future_type ;
-
-      if ( n < 2 ) {
-
-        has_nested = -1 ;
-
-        result = n ;
-      }
-      else {
-        if ( has_nested == 0 ) {
-          // Spawn new children and respawn myself to sum their results:
-          // Spawn lower value at higher priority as it has a shorter
-          // path to completion.
-          if ( fib_2.is_null() ) {
-            fib_2 = policy.task_create( FibChild(policy,n-2) );
-          }
-
-          if ( ! fib_2.is_null() && fib_1.is_null() ) {
-            fib_1 = policy.task_create( FibChild(policy,n-1) );
-          }
-
-          if ( ! fib_1.is_null() ) {
-            has_nested = 2 ;
-
-            policy.spawn( fib_2 , true /* high priority */ );
-            policy.spawn( fib_1 );
-            policy.add_dependence( this , fib_1 );
-            policy.add_dependence( this , fib_2 );
-            policy.respawn( this );
-          }
-          else {
-            // Release task memory before spawning the task,
-            // after spawning memory cannot be released.
-            fib_2 = future_type();
-            // Respawn when more memory is available
-            policy.respawn_needing_memory( this );
-          }
-        }
-        else if ( has_nested == 2 ) {
-
-          has_nested = -1 ;
-
-          result = fib_1.get() + fib_2.get();
-
-if ( false ) {
-  printf("FibChild %ld = fib(%ld), task_count(%d)\n"
-        , long(n), long(result), policy.allocated_task_count());
-}
-
-        }
-        else {
-          printf("FibChild(%ld) execution error\n",(long)n);
-          Kokkos::abort("FibChild execution error");
-        }
-      }
-    }
-};
-
-template< class ExecSpace >
-struct FibChild2 {
-
-  typedef long value_type ;
-
-  Kokkos::Experimental::TaskPolicy<ExecSpace> policy ;
-  Kokkos::Experimental::Future<long,ExecSpace> fib_a ;
-  Kokkos::Experimental::Future<long,ExecSpace> fib_b ;
-  const value_type n ;
-  int has_nested ;
-
-  KOKKOS_INLINE_FUNCTION
-  FibChild2( const Kokkos::Experimental::TaskPolicy<ExecSpace> & arg_policy
-           , const value_type arg_n )
-    : policy(arg_policy)
-    , n( arg_n ), has_nested(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply( value_type & result )
-    {
-      if ( 0 == has_nested ) {
-        if ( n < 2 ) {
-
-          has_nested = -1 ;
-
-          result = n ;
-        }
-        else if ( n < 4 ) {
-          // Spawn new children and respawn myself to sum their results:
-          // result = Fib(n-1) + Fib(n-2)
-          has_nested = 2 ;
-
-          // Spawn lower value at higher priority as it has a shorter
-          // path to completion.
-
-          policy.clear_dependence( this );
-          fib_a = policy.spawn( policy.task_create( FibChild2(policy,n-1) ) );
-          fib_b = policy.spawn( policy.task_create( FibChild2(policy,n-2) ) , true );
-          policy.add_dependence( this , fib_a );
-          policy.add_dependence( this , fib_b );
-          policy.respawn( this );
-        }
-        else {
-          // Spawn new children and respawn myself to sum their results:
-          // result = Fib(n-1) + Fib(n-2)
-          // result = ( Fib(n-2) + Fib(n-3) ) + ( Fib(n-3) + Fib(n-4) )
-          // result = ( ( Fib(n-3) + Fib(n-4) ) + Fib(n-3) ) + ( Fib(n-3) + Fib(n-4) )
-          // result = 3 * Fib(n-3) + 2 * Fib(n-4)
-          has_nested = 4 ;
-
-          // Spawn lower value at higher priority as it has a shorter
-          // path to completion.
-
-          policy.clear_dependence( this );
-          fib_a = policy.spawn( policy.task_create( FibChild2(policy,n-3) ) );
-          fib_b = policy.spawn( policy.task_create( FibChild2(policy,n-4) ) , true );
-          policy.add_dependence( this , fib_a );
-          policy.add_dependence( this , fib_b );
-          policy.respawn( this );
-        }
-     }
-     else if ( 2 == has_nested || 4 == has_nested ) {
-        result = ( has_nested == 2 ) ? fib_a.get() + fib_b.get()
-                                     : 3 * fib_a.get() + 2 * fib_b.get() ;
-
-        has_nested = -1 ;
-      }
-      else {
-        printf("FibChild2(%ld) execution error\n",(long)n);
-        Kokkos::abort("FibChild2 execution error");
-      }
-    }
-};
-
-template< class ExecSpace >
-void test_fib( long n , const unsigned task_max_count = 4096 )
-{
-  const unsigned task_max_size   = 256 ;
-  const unsigned task_dependence = 4 ;
-
-  Kokkos::Experimental::TaskPolicy<ExecSpace>
-    policy( task_max_count
-          , task_max_size
-          , task_dependence );
-
-  Kokkos::Experimental::Future<long,ExecSpace> f =
-    policy.spawn( policy.proc_create( FibChild<ExecSpace>(policy,n) ) );
-
-  Kokkos::Experimental::wait( policy );
-
-  if ( f.get() != eval_fib(n) ) {
-    std::cout << "Fib(" << n << ") = " << f.get();
-    std::cout << " != " << eval_fib(n);
-    std::cout << std::endl ;
-  }
-}
-
-template< class ExecSpace >
-void test_fib2( long n , const unsigned task_max_count = 1024 )
-{
-  const unsigned task_max_size   = 256 ;
-  const unsigned task_dependence = 4 ;
-
-  Kokkos::Experimental::TaskPolicy<ExecSpace>
-    policy( task_max_count
-          , task_max_size
-          , task_dependence );
-
-  Kokkos::Experimental::Future<long,ExecSpace> f =
-    policy.spawn( policy.proc_create( FibChild2<ExecSpace>(policy,n) ) );
-
-  Kokkos::Experimental::wait( policy );
-
-  if ( f.get() != eval_fib(n) ) {
-    std::cout << "Fib2(" << n << ") = " << f.get();
-    std::cout << " != " << eval_fib(n);
-    std::cout << std::endl ;
-  }
-}
-
-//----------------------------------------------------------------------------
-
-template< class ExecSpace >
-struct Norm2 {
-
-  typedef double value_type ;
-
-  const double * const m_x ;
-
-  Norm2( const double * x ) : m_x(x) {}
-
-  inline
-  void init( double & val ) const { val = 0 ; }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( int i , double & val ) const { val += m_x[i] * m_x[i] ; }
-
-  void apply( double & dst ) const { dst = std::sqrt( dst ); }
-};
-
-template< class ExecSpace >
-void test_norm2( const int n )
-{
-  const unsigned task_max_count  = 1024 ;
-  const unsigned task_max_size   = 256 ;
-  const unsigned task_dependence = 4 ;
-
-  Kokkos::Experimental::TaskPolicy<ExecSpace>
-    policy( task_max_count
-          , task_max_size
-          , task_dependence );
-
-  double * const x = new double[n];
-
-  for ( int i = 0 ; i < n ; ++i ) x[i] = 1 ;
-
-  Kokkos::RangePolicy<ExecSpace> r(0,n);
-
-  Kokkos::Experimental::Future<double,ExecSpace> f =
-    Kokkos::Experimental::spawn_reduce( policy , r , Norm2<ExecSpace>(x) );
-
-  Kokkos::Experimental::wait( policy );
-
-#if defined(PRINT)
-  std::cout << "Norm2: " << f.get() << std::endl ;
-#endif
-
-  delete[] x ;
-}
-
-//----------------------------------------------------------------------------
-
-template< class Space >
-struct TaskDep {
-
-  typedef int value_type ;
-  typedef Kokkos::Experimental::TaskPolicy< Space > policy_type ;
-
-  const policy_type policy ;
-  const int         input ;
-
-  TaskDep( const policy_type & arg_p , const int arg_i )
-    : policy( arg_p ), input( arg_i ) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply( int & val )
-  {
-    val = input ;
-    const int num = policy.get_dependence( this );
-
-    for ( int i = 0 ; i < num ; ++i ) {
-      Kokkos::Experimental::Future<int,Space> f = policy.get_dependence( this , i );
-      val += f.get();
-    }
-  }
-};
-
-
-template< class Space >
-void test_task_dep( const int n )
-{
-  enum { NTEST = 64 };
-
-  const unsigned task_max_count  = 1024 ;
-  const unsigned task_max_size   = 64 ;
-  const unsigned task_dependence = 4 ;
-
-  Kokkos::Experimental::TaskPolicy<Space>
-    policy( task_max_count
-          , task_max_size
-          , task_dependence );
-
-  Kokkos::Experimental::Future<int,Space> f[ NTEST ];
-
-  for ( int i = 0 ; i < NTEST ; ++i ) {
-    // Create task in the "constructing" state with capacity for 'n+1' dependences
-    f[i] = policy.proc_create( TaskDep<Space>(policy,0) , n + 1 );
-
-    if ( f[i].get_task_state() != Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) {
-      Kokkos::Impl::throw_runtime_exception("get_task_state() != Kokkos::Experimental::TASK_STATE_CONSTRUCTING");
-    }
-
-    // Only use 'n' dependences
-
-    for ( int j = 0 ; j < n ; ++j ) {
-
-      Kokkos::Experimental::Future<int,Space> nested =
-        policy.proc_create( TaskDep<Space>(policy,j+1) );
-
-      policy.spawn( nested );
-
-      // Add dependence to a "constructing" task
-      policy.add_dependence( f[i] , nested );
-    }
-
-    // Spawn task from the "constructing" to the "waiting" state
-    policy.spawn( f[i] );
-  }
-
-  const int answer = n % 2 ? n * ( ( n + 1 ) / 2 ) : ( n / 2 ) * ( n + 1 );
-
-  Kokkos::Experimental::wait( policy );
-
-  int error = 0 ;
-  for ( int i = 0 ; i < NTEST ; ++i ) {
-    if ( f[i].get_task_state() != Kokkos::Experimental::TASK_STATE_COMPLETE ) {
-      Kokkos::Impl::throw_runtime_exception("get_task_state() != Kokkos::Experimental::TASK_STATE_COMPLETE");
-    }
-    if ( answer != f[i].get() && 0 == error ) {
-      std::cout << "test_task_dep(" << n << ") ERROR at[" << i << "]"
-                << " answer(" << answer << ") != result(" << f[i].get() << ")" << std::endl ;
-    }
-  }
-}
-
-//----------------------------------------------------------------------------
-
-template< class ExecSpace >
-struct TaskTeam {
-
-  enum { SPAN = 8 };
-
-  typedef void value_type ;
-  typedef Kokkos::Experimental::TaskPolicy<ExecSpace>  policy_type ;
-  typedef Kokkos::Experimental::Future<void,ExecSpace> future_type ;
-  typedef Kokkos::View<long*,ExecSpace>                view_type ;
-
-  policy_type  policy ;
-  future_type  future ;
-
-  view_type  result ;
-  const long nvalue ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskTeam( const policy_type & arg_policy
-          , const view_type   & arg_result
-          , const long          arg_nvalue )
-    : policy(arg_policy)
-    , future()
-    , result( arg_result )
-    , nvalue( arg_nvalue )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply( const typename policy_type::member_type & member )
-    {
-      const long end   = nvalue + 1 ;
-      const long begin = 0 < end - SPAN ? end - SPAN : 0 ;
-
-      if ( 0 < begin && future.get_task_state() == Kokkos::Experimental::TASK_STATE_NULL ) {
-        if ( member.team_rank() == 0 ) {
-          future = policy.spawn( policy.task_create_team( TaskTeam( policy , result , begin - 1 ) ) );
-          policy.clear_dependence( this );
-          policy.add_dependence( this , future );
-          policy.respawn( this );
-        }
-        return ;
-      }
-
-      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i ) { result[i] = i + 1 ; }
-                          );
-    }
-};
-
-template< class ExecSpace >
-struct TaskTeamValue {
-
-  enum { SPAN = 8 };
-
-  typedef long value_type ;
-  typedef Kokkos::Experimental::TaskPolicy<ExecSpace>         policy_type ;
-  typedef Kokkos::Experimental::Future<value_type,ExecSpace>  future_type ;
-  typedef Kokkos::View<long*,ExecSpace>                       view_type ;
-
-  policy_type  policy ;
-  future_type  future ;
-
-  view_type  result ;
-  const long nvalue ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskTeamValue( const policy_type & arg_policy
-               , const view_type   & arg_result
-               , const long          arg_nvalue )
-    : policy(arg_policy)
-    , future()
-    , result( arg_result )
-    , nvalue( arg_nvalue )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply( const typename policy_type::member_type & member , value_type & final )
-    {
-      const long end   = nvalue + 1 ;
-      const long begin = 0 < end - SPAN ? end - SPAN : 0 ;
-
-      if ( 0 < begin && future.is_null() ) {
-        if ( member.team_rank() == 0 ) {
-
-          future = policy.task_create_team( TaskTeamValue( policy , result , begin - 1 ) );
-
-          policy.spawn( future );
-          policy.add_dependence( this , future );
-          policy.respawn( this );
-        }
-        return ;
-      }
-
-      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
-                          , [&]( int i ) { result[i] = i + 1 ; }
-                          );
-
-      if ( member.team_rank() == 0 ) {
-        final = result[nvalue] ;
-      }
-
-      Kokkos::memory_fence();
-    }
-};
-
-template< class ExecSpace >
-void test_task_team( long n )
-{
-  typedef TaskTeam< ExecSpace >            task_type ;
-  typedef TaskTeamValue< ExecSpace >       task_value_type ;
-  typedef typename task_type::view_type    view_type ;
-  typedef typename task_type::policy_type  policy_type ;
-
-  typedef typename task_type::future_type        future_type ;
-  typedef typename task_value_type::future_type  future_value_type ;
-
-  const unsigned task_max_count  = 1024 ;
-  const unsigned task_max_size   = 256 ;
-  const unsigned task_dependence = 4 ;
-
-  policy_type
-    policy( task_max_count
-          , task_max_size
-          , task_dependence );
-
-  view_type    result("result",n+1);
-
-  typename view_type::HostMirror
-    host_result = Kokkos::create_mirror_view( result );
-
-  future_type f = policy.proc_create_team( task_type( policy , result , n ) );
-
-  ASSERT_FALSE( f.is_null() );
-
-  policy.spawn( f );
-
-  Kokkos::Experimental::wait( policy );
-
-  Kokkos::deep_copy( host_result , result );
-
-  for ( long i = 0 ; i <= n ; ++i ) {
-    const long answer = i + 1 ;
-    if ( host_result(i) != answer ) {
-      std::cerr << "test_task_team void ERROR result(" << i << ") = "
-                << host_result(i) << " != " << answer << std::endl ;
-    }
-  }
-
-  future_value_type fv = policy.proc_create_team( task_value_type( policy , result , n ) );
-
-  ASSERT_FALSE( fv.is_null() );
-
-  policy.spawn( fv );
-
-  Kokkos::Experimental::wait( policy );
-
-  Kokkos::deep_copy( host_result , result );
-
-  if ( fv.get() != n + 1 ) {
-    std::cerr << "test_task_team value ERROR future = "
-              << fv.get() << " != " << n + 1 << std::endl ;
-  }
-  for ( long i = 0 ; i <= n ; ++i ) {
-    const long answer = i + 1 ;
-    if ( host_result(i) != answer ) {
-      std::cerr << "test_task_team value ERROR result(" << i << ") = "
-                << host_result(i) << " != " << answer << std::endl ;
-    }
-  }
-}
-
-//----------------------------------------------------------------------------
-
-template< class ExecSpace >
-struct TaskLatchAdd {
-
-  typedef void value_type ;
-  typedef Kokkos::Experimental::Future< Kokkos::Experimental::Latch , ExecSpace >  future_type ;
-
-  future_type     latch ;
-  volatile int *  count ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskLatchAdd( const future_type & arg_latch 
-              , volatile int * const arg_count )
-    : latch( arg_latch )
-    , count( arg_count )
-    {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply()
-    {
-      Kokkos::atomic_fetch_add( count , 1 );
-      latch.add(1);
-    }
-};
-
-template< class ExecSpace >
-struct TaskLatchRun {
-
-  typedef void value_type ;
-  typedef Kokkos::Experimental::TaskPolicy< ExecSpace >      policy_type ;
-  typedef Kokkos::Experimental::Future< Kokkos::Experimental::Latch , ExecSpace >  future_type ;
-
-  policy_type policy ;
-  int total ;
-  volatile int count ;
-
-  KOKKOS_INLINE_FUNCTION
-  TaskLatchRun( const policy_type & arg_policy , const int arg_total )
-    : policy(arg_policy), total(arg_total), count(0) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void apply()
-    {
-      if ( 0 == count && 0 < total ) {
-        future_type latch = policy.create_latch( total );
-
-        for ( int i = 0 ; i < total ; ++i ) {
-          auto f = policy.task_create( TaskLatchAdd<ExecSpace>(latch,&count) , 0 );
-          if ( f.is_null() ) {
-            Kokkos::abort("TaskLatchAdd allocation FAILED" );
-          }
-
-          if ( policy.spawn( f ).is_null() ) {
-            Kokkos::abort("TaskLatcAdd spawning FAILED" );
-          }
-        }
-
-        policy.add_dependence( this , latch );
-        policy.respawn( this );
-      }
-      else if ( count != total ) {
-        printf("TaskLatchRun FAILED %d != %d\n",count,total);
-      }
-    }
-};
-
-
-template< class ExecSpace >
-void test_latch( int n )
-{
-  typedef TaskLatchRun< ExecSpace >        task_type ;
-  typedef typename task_type::policy_type  policy_type ;
-
-  // Primary + latch + n * LatchAdd
-  //
-  // This test uses several two different block sizes for allocation from the
-  // memory pool, so the memory size requested must be big enough to cause two
-  // or more superblocks to be used.  Currently, the superblock size in the
-  // task policy is 2^16, so make the minimum requested memory size greater
-  // than this.
-  const unsigned task_max_count  = n + 2 < 256 ? 256 : n + 2;
-  const unsigned task_max_size   = 256;
-  const unsigned task_dependence = 4 ;
-
-  policy_type
-    policy( task_max_count
-          , task_max_size
-          , task_dependence );
-
-  policy.spawn( policy.proc_create( TaskLatchRun<ExecSpace>(policy,n) ) );
-
-  wait( policy );
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-} // namespace TestTaskPolicy
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-#endif /* #ifndef KOKKOS_UNITTEST_TASKPOLICY_HPP */
-
-
diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1134553980f8a63351f85a86b33537a35d52644c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp
@@ -0,0 +1,551 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+
+#ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP
+#define KOKKOS_UNITTEST_TASKSCHEDULER_HPP
+
+#include <stdio.h>
+#include <iostream>
+#include <cmath>
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace TestTaskScheduler {
+
+namespace {
+
+inline
+long eval_fib( long n )
+{
+  constexpr long mask = 0x03 ;
+
+  long fib[4] = { 0 , 1 , 1 , 2 };
+
+  for ( long i = 2 ; i <= n ; ++i ) {
+    fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ];
+  }
+  
+  return fib[ n & mask ];
+}
+
+}
+
+template< typename Space >
+struct TestFib
+{
+  typedef Kokkos::TaskScheduler<Space>  policy_type ;
+  typedef Kokkos::Future<long,Space> future_type ;
+  typedef long value_type ;
+
+  policy_type policy ;
+  future_type fib_m1 ;
+  future_type fib_m2 ;
+  const value_type n ;
+
+  KOKKOS_INLINE_FUNCTION
+  TestFib( const policy_type & arg_policy , const value_type arg_n )
+    : policy(arg_policy)
+    , fib_m1() , fib_m2()
+    , n( arg_n )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( typename policy_type::member_type & , value_type & result )
+    {
+#if 0
+      printf( "\nTestFib(%ld) %d %d\n"
+             , n
+             , int( ! fib_m1.is_null() )
+             , int( ! fib_m2.is_null() )
+             );
+#endif
+
+      if ( n < 2 ) {
+        result = n ;
+      }
+      else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) {
+        result = fib_m1.get() + fib_m2.get();
+      }
+      else {
+
+        // Spawn new children and respawn myself to sum their results:
+        // Spawn lower value at higher priority as it has a shorter
+        // path to completion.
+
+        fib_m2 = policy.task_spawn( TestFib(policy,n-2)
+                                  , Kokkos::TaskSingle
+                                  , Kokkos::TaskHighPriority );
+
+        fib_m1 = policy.task_spawn( TestFib(policy,n-1)
+                                  , Kokkos::TaskSingle );
+
+        Kokkos::Future<Space> dep[] = { fib_m1 , fib_m2 };
+
+        Kokkos::Future<Space> fib_all = policy.when_all( 2 , dep );
+
+        if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) {
+          // High priority to retire this branch
+          policy.respawn( this , Kokkos::TaskHighPriority , fib_all );
+        }
+        else {
+#if 1
+      printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n"
+             , n
+             , policy.allocation_capacity()
+             , policy.allocated_task_count_max()
+             , policy.allocated_task_count_accum()
+             );
+#endif
+          Kokkos::abort("TestFib insufficient memory");
+
+        }
+      }
+    }
+
+  static void run( int i , size_t MemoryCapacity = 16000 )
+    {
+      typedef typename policy_type::memory_space memory_space ;
+
+      enum { Log2_SuperBlockSize = 12 };
+
+      policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize );
+
+      future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle );
+      Kokkos::wait( root_policy );
+      ASSERT_EQ( eval_fib(i) , f.get() );
+
+#if 0
+      fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n"
+             , i
+             , int(root_policy.template spawn_allocation_size<TestFib>())
+             , int(root_policy.when_all_allocation_size(2))
+             , root_policy.allocation_capacity()
+             , root_policy.allocated_task_count_max()
+             , root_policy.allocated_task_count_accum()
+             );
+      fflush( stdout );
+#endif
+    }
+
+};
+
+} // namespace TestTaskScheduler
+
+//----------------------------------------------------------------------------
+
+namespace TestTaskScheduler {
+
+template< class Space >
+struct TestTaskDependence {
+
+  typedef Kokkos::TaskScheduler<Space>  policy_type ;
+  typedef Kokkos::Future<Space>      future_type ;
+  typedef Kokkos::View<long,Space>   accum_type ;
+  typedef void value_type ;
+
+  policy_type  m_policy ;
+  accum_type   m_accum ;
+  long         m_count ;
+
+  KOKKOS_INLINE_FUNCTION
+  TestTaskDependence( long n
+                    , const policy_type & arg_policy
+                    , const accum_type  & arg_accum )
+    : m_policy( arg_policy )
+    , m_accum( arg_accum )
+    , m_count( n )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( typename policy_type::member_type & )
+    {
+       enum { CHUNK = 8 };
+       const int n = CHUNK < m_count ? CHUNK : m_count ;
+
+       if ( 1 < m_count ) {
+         future_type f[ CHUNK ] ;
+
+         const int inc = ( m_count + n - 1 ) / n ;
+
+         for ( int i = 0 ; i < n ; ++i ) {
+           long begin = i * inc ;
+           long count = begin + inc < m_count ? inc : m_count - begin ;
+           f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle );
+         }
+
+         m_count = 0 ;
+
+         m_policy.respawn( this , m_policy.when_all( n , f ) );
+       }
+       else if ( 1 == m_count ) {
+         Kokkos::atomic_increment( & m_accum() );
+       }
+    }
+
+  static void run( int n )
+    {
+      typedef typename policy_type::memory_space memory_space ;
+
+      // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool
+      enum { MemoryCapacity = 16000 };
+      enum { Log2_SuperBlockSize = 12 };
+      policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize );
+
+      accum_type accum("accum");
+
+      typename accum_type::HostMirror host_accum =
+        Kokkos::create_mirror_view( accum );
+
+      policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle );
+
+      Kokkos::wait( policy );
+
+      Kokkos::deep_copy( host_accum , accum );
+
+      ASSERT_EQ( host_accum() , n );
+    }
+};
+
+} // namespace TestTaskScheduler
+
+//----------------------------------------------------------------------------
+
+namespace TestTaskScheduler {
+
+template< class ExecSpace >
+struct TestTaskTeam {
+
+  //enum { SPAN = 8 };
+  enum { SPAN = 33 };
+  //enum { SPAN = 1 };
+
+  typedef void value_type ;
+  typedef Kokkos::TaskScheduler<ExecSpace>  policy_type ;
+  typedef Kokkos::Future<ExecSpace>      future_type ;
+  typedef Kokkos::View<long*,ExecSpace>  view_type ;
+
+  policy_type  policy ;
+  future_type  future ;
+
+  view_type  parfor_result ;
+  view_type  parreduce_check ;
+  view_type  parscan_result ;
+  view_type  parscan_check ;
+  const long nvalue ;
+
+  KOKKOS_INLINE_FUNCTION
+  TestTaskTeam( const policy_type & arg_policy
+              , const view_type   & arg_parfor_result
+              , const view_type   & arg_parreduce_check
+              , const view_type   & arg_parscan_result
+              , const view_type   & arg_parscan_check
+              , const long          arg_nvalue )
+    : policy(arg_policy)
+    , future()
+    , parfor_result( arg_parfor_result )
+    , parreduce_check( arg_parreduce_check )
+    , parscan_result( arg_parscan_result )
+    , parscan_check( arg_parscan_check )
+    , nvalue( arg_nvalue )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( typename policy_type::member_type & member )
+    {
+      const long end   = nvalue + 1 ;
+      const long begin = 0 < end - SPAN ? end - SPAN : 0 ;
+
+      if ( 0 < begin && future.is_null() ) {
+        if ( member.team_rank() == 0 ) {
+          future = policy.task_spawn
+            ( TestTaskTeam( policy ,
+                            parfor_result ,
+                            parreduce_check,
+                            parscan_result,
+                            parscan_check,
+                            begin - 1 )
+            , Kokkos::TaskTeam );
+
+          assert( ! future.is_null() );
+
+          policy.respawn( this , future );
+        }
+        return ;
+      }
+
+      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i ) { parfor_result[i] = i ; }
+                          );
+
+      // test parallel_reduce without join
+    
+      long tot = 0;
+      long expected = (begin+end-1)*(end-begin)*0.5;
+      
+      Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i, long &res) { res += parfor_result[i]; }
+                          , tot);
+      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i ) { parreduce_check[i] = expected-tot ; }
+                          );
+
+      // test parallel_reduce with join
+
+      tot = 0;
+      Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i, long &res) { res += parfor_result[i]; }
+                          , [&]( long& val1, const long& val2) { val1 += val2; }
+                          , tot);
+      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i ) { parreduce_check[i] += expected-tot ; }
+                          );
+
+      // test parallel_scan
+
+      // Exclusive scan
+      Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i, long &val , const bool final ) {
+                              if ( final ) { parscan_result[i] = val; }
+                              val += i;
+                            }
+                          );
+      if ( member.team_rank() == 0 ) {
+        for ( long i = begin ; i < end ; ++i ) {
+          parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i];
+        }
+      }
+
+      // Inclusive scan
+      Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i, long &val , const bool final ) {
+                              val += i;
+                              if ( final ) { parscan_result[i] = val; }
+                            }
+                          );
+      if ( member.team_rank() == 0 ) {
+        for ( long i = begin ; i < end ; ++i ) {
+          parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i];
+        }
+      }
+      // ThreadVectorRange check
+      /*
+      long result = 0;
+      expected = (begin+end-1)*(end-begin)*0.5;
+      Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member , 0 , 1 )
+                             , [&] ( const int i , long & outerUpdate ) {
+                                 long sum_j = 0.0;
+                                 Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( member , end - begin )
+                                                        , [&] ( const int j , long &innerUpdate ) {
+                                                            innerUpdate += begin+j;
+                                                          } , sum_j );
+                                 outerUpdate += sum_j ;
+                               } , result );
+      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i ) {
+                              parreduce_check[i] += result-expected ;
+                            }
+                          );
+      */
+    }
+
+  static void run( long n )
+    {
+      // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop
+      // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP
+      const unsigned memory_capacity = 400000 ;
+
+      policy_type root_policy( typename policy_type::memory_space()
+                        , memory_capacity );
+
+      view_type   root_parfor_result("parfor_result",n+1);
+      view_type   root_parreduce_check("parreduce_check",n+1);
+      view_type   root_parscan_result("parscan_result",n+1);
+      view_type   root_parscan_check("parscan_check",n+1);
+
+      typename view_type::HostMirror
+        host_parfor_result = Kokkos::create_mirror_view( root_parfor_result );
+      typename view_type::HostMirror
+        host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check );
+      typename view_type::HostMirror
+        host_parscan_result = Kokkos::create_mirror_view( root_parscan_result );
+      typename view_type::HostMirror
+        host_parscan_check = Kokkos::create_mirror_view( root_parscan_check );
+
+      future_type f = root_policy.host_spawn(
+                        TestTaskTeam( root_policy ,
+                                      root_parfor_result ,
+                                      root_parreduce_check ,
+                                      root_parscan_result,
+                                      root_parscan_check,
+                                      n ) ,
+                        Kokkos::TaskTeam );
+
+      Kokkos::wait( root_policy );
+
+      Kokkos::deep_copy( host_parfor_result , root_parfor_result );
+      Kokkos::deep_copy( host_parreduce_check , root_parreduce_check );
+      Kokkos::deep_copy( host_parscan_result , root_parscan_result );
+      Kokkos::deep_copy( host_parscan_check , root_parscan_check );
+
+      for ( long i = 0 ; i <= n ; ++i ) {
+        const long answer = i ;
+        if ( host_parfor_result(i) != answer ) {
+          std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = "
+                    << host_parfor_result(i) << " != " << answer << std::endl ;
+        }
+        if ( host_parreduce_check(i) != 0 ) {
+          std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = "
+                    << host_parreduce_check(i) << " != 0" << std::endl ;
+        }
+        if ( host_parscan_check(i) != 0 ) {
+          std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = "
+                    << host_parscan_check(i) << " != 0" << std::endl ;
+        }
+      }
+    }
+};
+
+template< class ExecSpace >
+struct TestTaskTeamValue {
+
+  enum { SPAN = 8 };
+
+  typedef long value_type ;
+  typedef Kokkos::TaskScheduler<ExecSpace>         policy_type ;
+  typedef Kokkos::Future<value_type,ExecSpace>  future_type ;
+  typedef Kokkos::View<long*,ExecSpace>         view_type ;
+
+  policy_type  policy ;
+  future_type  future ;
+
+  view_type  result ;
+  const long nvalue ;
+
+  KOKKOS_INLINE_FUNCTION
+  TestTaskTeamValue( const policy_type & arg_policy
+                   , const view_type   & arg_result
+                   , const long          arg_nvalue )
+    : policy(arg_policy)
+    , future()
+    , result( arg_result )
+    , nvalue( arg_nvalue )
+    {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( typename policy_type::member_type const & member
+                 , value_type & final )
+    {
+      const long end   = nvalue + 1 ;
+      const long begin = 0 < end - SPAN ? end - SPAN : 0 ;
+
+      if ( 0 < begin && future.is_null() ) {
+        if ( member.team_rank() == 0 ) {
+
+          future = policy.task_spawn
+            ( TestTaskTeamValue( policy , result , begin - 1 )
+            , Kokkos::TaskTeam );
+
+          assert( ! future.is_null() );
+
+          policy.respawn( this , future );
+        }
+        return ;
+      }
+
+      Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end)
+                          , [&]( int i ) { result[i] = i + 1 ; }
+                          );
+
+      if ( member.team_rank() == 0 ) {
+        final = result[nvalue] ;
+      }
+
+      Kokkos::memory_fence();
+    }
+
+  static void run( long n )
+    {
+      // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop
+      const unsigned memory_capacity = 100000 ;
+
+      policy_type root_policy( typename policy_type::memory_space()
+                             , memory_capacity );
+
+      view_type   root_result("result",n+1);
+
+      typename view_type::HostMirror
+        host_result = Kokkos::create_mirror_view( root_result );
+
+      future_type fv = root_policy.host_spawn
+        ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam );
+
+      Kokkos::wait( root_policy );
+
+      Kokkos::deep_copy( host_result , root_result );
+
+      if ( fv.get() != n + 1 ) {
+        std::cerr << "TestTaskTeamValue ERROR future = "
+                  << fv.get() << " != " << n + 1 << std::endl ;
+      }
+      for ( long i = 0 ; i <= n ; ++i ) {
+        const long answer = i + 1 ;
+        if ( host_result(i) != answer ) {
+          std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = "
+                    << host_result(i) << " != " << answer << std::endl ;
+        }
+      }
+    }
+};
+} // namespace TestTaskScheduler
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+#endif /* #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP */
+
+
diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp
index db6b0cff7e21654f7ba17b531e63fbc63deb2b06..23ad2be3f053f522d6d8b9308ba881711dd7d63a 100644
--- a/lib/kokkos/core/unit_test/TestTeam.hpp
+++ b/lib/kokkos/core/unit_test/TestTeam.hpp
@@ -376,9 +376,14 @@ public:
   void run_test( const size_t nteam )
   {
     typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged >  result_type ;
-
     const unsigned REPEAT = 100000 ;
-    const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ;
+    unsigned Repeat;
+    if ( nteam == 0 )
+    {
+      Repeat = 1;
+    } else {
+      Repeat = ( REPEAT + nteam - 1 ) / nteam ; //error here
+    }
 
     functor_type functor ;
 
@@ -581,7 +586,7 @@ struct ScratchTeamFunctor {
   KOKKOS_INLINE_FUNCTION
   void operator()( const typename policy_type::member_type & ind , value_type & update ) const
   {
-    const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 2*ind.team_size() );
+    const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 3*ind.team_size() );
     const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT );
     const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT );
 
@@ -645,7 +650,7 @@ struct TestScratchTeam {
     typename Functor::value_type error_count = 0 ;
 
     int team_scratch_size   = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) +
-                              Functor::shared_int_array_type::shmem_size(2*team_size);
+                              Functor::shared_int_array_type::shmem_size(3*team_size);
     int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT);
     Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size),
                                                           Kokkos::PerThread(thread_scratch_size)) ,
@@ -660,76 +665,84 @@ namespace Test {
 template< class ExecSpace>
 KOKKOS_INLINE_FUNCTION
 int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) {
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16);
-
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000);
-
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000);
-      Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000);
-
-
-      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) {
-        a_team1(i) = 1000000 + i;
-        a_team2(i) = 2000000 + i;
-        a_team3(i) = 3000000 + i;
-      });
-      team.team_barrier();
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){
-        a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
-        a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
-        a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
-      });
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16);
+
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000);
+
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000);
+  Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000);
+
+  // The explicit types for 0 and 128 are here to test TeamThreadRange accepting different
+  // types for begin and end.
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,int(0),unsigned(128)), [&] (const int& i)
+  {
+    a_team1(i) = 1000000 + i;
+    a_team2(i) = 2000000 + i;
+    a_team3(i) = 3000000 + i;
+  });
+  team.team_barrier();
+  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i)
+  {
+    a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
+    a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
+    a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
+  });
 
-      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) {
-        b_team1(i) = 1000000 + i;
-        b_team2(i) = 2000000 + i;
-        b_team3(i) = 3000000 + i;
-      });
-      team.team_barrier();
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){
-        b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
-        b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
-        b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
-      });
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i)
+  {
+    b_team1(i) = 1000000 + i;
+    b_team2(i) = 2000000 + i;
+    b_team3(i) = 3000000 + i;
+  });
+  team.team_barrier();
+  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i)
+  {
+    b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
+    b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
+    b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
+  });
 
-      team.team_barrier();
-      int error = 0;
-      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) {
-        if(a_team1(i) != 1000000 + i) error++;
-        if(a_team2(i) != 2000000 + i) error++;
-        if(a_team3(i) != 3000000 + i) error++;
-      });
-      team.team_barrier();
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){
-        if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
-        if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
-        if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
-      });
+  team.team_barrier();
+  int error = 0;
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i)
+  {
+    if(a_team1(i) != 1000000 + i) error++;
+    if(a_team2(i) != 2000000 + i) error++;
+    if(a_team3(i) != 3000000 + i) error++;
+  });
+  team.team_barrier();
+  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i)
+  {
+    if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
+    if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
+    if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
+  });
 
-      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) {
-        if(b_team1(i) != 1000000 + i) error++;
-        if(b_team2(i) != 2000000 + i) error++;
-        if(b_team3(i) != 3000000 + i) error++;
-      });
-      team.team_barrier();
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){
-        if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
-        if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
-        if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
-      });
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i)
+  {
+    if(b_team1(i) != 1000000 + i) error++;
+    if(b_team2(i) != 2000000 + i) error++;
+    if(b_team3(i) != 3000000 + i) error++;
+  });
+  team.team_barrier();
+  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i)
+  {
+    if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
+    if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
+    if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
+  });
 
   return error;
 }
 
-
 struct TagReduce {};
 struct TagFor {};
 
diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp
index 48187f036844ccfda2d186f245b1673c7ffe5fd4..d9b06c29e49d0362226168861b0d5e818d1d82f9 100644
--- a/lib/kokkos/core/unit_test/TestTeamVector.hpp
+++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp
@@ -173,13 +173,15 @@ struct functor_team_for {
 
       // Accumulate value into per thread shared memory
       // This is non blocking
-      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) {
+      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i)
+      {
         values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size ();
       });
       // Wait for all memory to be written
       team.team_barrier ();
       // One thread per team executes the comparison
-      Kokkos::single(Kokkos::PerTeam(team),[&]() {
+      Kokkos::single(Kokkos::PerTeam(team),[&]()
+      {
             Scalar test = 0;
             Scalar value = 0;
             for (int i = 0; i < 131; ++i) {
@@ -213,12 +215,14 @@ struct functor_team_reduce {
   void operator() (typename policy_type::member_type team) const {
 
     Scalar value = Scalar();
-    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) {
+    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val)
+    {
       val += i - team.league_rank () + team.league_size () + team.team_size ();
     },value);
 
     team.team_barrier ();
-    Kokkos::single(Kokkos::PerTeam(team),[&]() {
+    Kokkos::single(Kokkos::PerTeam(team),[&]()
+        {
          Scalar test = 0;
          for (int i = 0; i < 131; ++i) {
            test += i - team.league_rank () + team.league_size () + team.team_size ();
@@ -250,15 +254,18 @@ struct functor_team_reduce_join {
     Scalar value = 0;
 
     Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131)
-      , [&] (int i, Scalar& val) {
+      , [&] (int i, Scalar& val)
+      {
         val += i - team.league_rank () + team.league_size () + team.team_size ();
       }
-      , [&] (volatile Scalar& val, const volatile Scalar& src) {val+=src;}
+      , [&] (volatile Scalar& val, const volatile Scalar& src)
+        {val+=src;}
       , value
     );
 
     team.team_barrier ();
-    Kokkos::single(Kokkos::PerTeam(team),[&]() {
+    Kokkos::single(Kokkos::PerTeam(team),[&]()
+    {
          Scalar test = 0;
          for (int i = 0; i < 131; ++i) {
            test += i - team.league_rank () + team.league_size () + team.team_size ();
@@ -298,18 +305,22 @@ struct functor_team_vector_for {
               static_cast<unsigned int> (shmemSize));
     }
     else {
-      Kokkos::single(Kokkos::PerThread(team),[&] () {
+      Kokkos::single(Kokkos::PerThread(team),[&] ()
+      {
         values(team.team_rank ()) = 0;
       });
 
-      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) {
-        Kokkos::single(Kokkos::PerThread(team),[&] () {
+      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i)
+      {
+        Kokkos::single(Kokkos::PerThread(team),[&] ()
+        {
           values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size ();
         });
       });
 
       team.team_barrier ();
-      Kokkos::single(Kokkos::PerTeam(team),[&]() {
+      Kokkos::single(Kokkos::PerTeam(team),[&]()
+      {
         Scalar test = 0;
         Scalar value = 0;
         for (int i = 0; i < 131; ++i) {
@@ -343,12 +354,14 @@ struct functor_team_vector_reduce {
   void operator() (typename policy_type::member_type team) const {
 
     Scalar value = Scalar();
-    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) {
+    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val)
+    {
         val += i - team.league_rank () + team.league_size () + team.team_size ();
     },value);
 
     team.team_barrier ();
-    Kokkos::single(Kokkos::PerTeam(team),[&]() {
+    Kokkos::single(Kokkos::PerTeam(team),[&]()
+    {
       Scalar test = 0;
       for (int i = 0; i < 131; ++i) {
         test += i - team.league_rank () + team.league_size () + team.team_size ();
@@ -379,15 +392,18 @@ struct functor_team_vector_reduce_join {
 
     Scalar value = 0;
     Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131)
-      , [&] (int i, Scalar& val) {
+      , [&] (int i, Scalar& val)
+      {
         val += i - team.league_rank () + team.league_size () + team.team_size ();
       }
-      , [&] (volatile Scalar& val, const volatile Scalar& src) {val+=src;}
+      , [&] (volatile Scalar& val, const volatile Scalar& src)
+        {val+=src;}
       , value
     );
 
     team.team_barrier ();
-    Kokkos::single(Kokkos::PerTeam(team),[&]() {
+    Kokkos::single(Kokkos::PerTeam(team),[&]()
+    {
       Scalar test = 0;
       for (int i = 0; i < 131; ++i) {
          test += i - team.league_rank () + team.league_size () + team.team_size ();
@@ -418,16 +434,19 @@ struct functor_vec_single {
     // inside a parallel_for and write to it.
     Scalar value = 0;
 
-    Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i) {
+    Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i)
+    {
       value = i; // This write is violating Kokkos semantics for nested parallelism
     });
 
-    Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val) {
+    Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val)
+    {
       val = 1;
     },value);
 
     Scalar value2 = 0;
-    Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val) {
+    Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val)
+    {
       val += value;
     },value2);
 
@@ -462,11 +481,13 @@ struct functor_vec_for {
       flag() = 1;
     }
     else {
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i) {
+      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i)
+      {
         values(13*team.team_rank() + i) = i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size();
       });
 
-      Kokkos::single(Kokkos::PerThread(team),[&] () {
+      Kokkos::single(Kokkos::PerThread(team),[&] ()
+      {
         Scalar test = 0;
         Scalar value = 0;
         for (int i = 0; i < 13; ++i) {
@@ -496,11 +517,13 @@ struct functor_vec_red {
   void operator() (typename policy_type::member_type team) const {
     Scalar value = 0;
 
-    Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val) {
+    Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val)
+    {
       val += i;
     }, value);
 
-    Kokkos::single(Kokkos::PerThread(team),[&] () {
+    Kokkos::single(Kokkos::PerThread(team),[&] ()
+    {
       Scalar test = 0;
       for(int i = 0; i < 13; i++) {
         test+=i;
@@ -526,12 +549,15 @@ struct functor_vec_red_join {
     Scalar value = 1;
 
     Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13)
-      , [&] (int i, Scalar& val) { val *= i; }
-      , [&] (Scalar& val, const Scalar& src) {val*=src;}
+      , [&] (int i, Scalar& val)
+      { val *= i; }
+      , [&] (Scalar& val, const Scalar& src)
+      {val*=src;}
       , value
     );
 
-    Kokkos::single(Kokkos::PerThread(team),[&] () {
+    Kokkos::single(Kokkos::PerThread(team),[&] ()
+    {
       Scalar test = 1;
       for(int i = 0; i < 13; i++) {
         test*=i;
@@ -554,7 +580,8 @@ struct functor_vec_scan {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (typename policy_type::member_type team) const {
-    Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final) {
+    Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final)
+    {
       val += i;
       if(final) {
         Scalar test = 0;
diff --git a/lib/kokkos/core/unit_test/TestThreads.cpp b/lib/kokkos/core/unit_test/TestThreads.cpp
deleted file mode 100644
index 93049b95dd7c75bcd88b8d6408e8a0249f905855..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestThreads.cpp
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-//
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-
-#include <gtest/gtest.h>
-
-#include <Kokkos_Macros.hpp>
-
-#if defined( KOKKOS_HAVE_PTHREAD )
-#ifdef KOKKOS_LAMBDA
-#undef KOKKOS_LAMBDA
-#endif
-#define KOKKOS_LAMBDA [=]
-
-#include <Kokkos_Core.hpp>
-
-#include <Threads/Kokkos_Threads_TaskPolicy.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestViewImpl.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-#include <TestAtomic.hpp>
-#include <TestAtomicOperations.hpp>
-
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestTaskPolicy.hpp>
-#include <TestMemoryPool.hpp>
-
-
-#include <TestCXX11.hpp>
-#include <TestCXX11Deduction.hpp>
-#include <TestTeamVector.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-
-
-#include <TestPolicyConstruction.hpp>
-
-#include <TestMDRange.hpp>
-
-namespace Test {
-
-class threads : public ::testing::Test {
-protected:
-  static void SetUpTestCase()
-  {
-    // Finalize without initialize is a no-op:
-    Kokkos::Threads::finalize();
-
-    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
-    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
-    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
-
-    unsigned threads_count = 0 ;
-
-    // Initialize and finalize with no threads:
-    Kokkos::Threads::initialize( 1u );
-    Kokkos::Threads::finalize();
-
-    threads_count = std::max( 1u , numa_count )
-                  * std::max( 2u , cores_per_numa * threads_per_core );
-
-    Kokkos::Threads::initialize( threads_count );
-    Kokkos::Threads::finalize();
-
-    threads_count = std::max( 1u , numa_count * 2 )
-                  * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
-
-    Kokkos::Threads::initialize( threads_count );
-    Kokkos::Threads::finalize();
-
-    // Quick attempt to verify thread start/terminate don't have race condition:
-    threads_count = std::max( 1u , numa_count )
-                  * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
-    for ( unsigned i = 0 ; i < 10 ; ++i ) {
-      Kokkos::Threads::initialize( threads_count );
-      Kokkos::Threads::sleep();
-      Kokkos::Threads::wake();
-      Kokkos::Threads::finalize();
-    }
-
-    Kokkos::Threads::initialize( threads_count );
-    Kokkos::Threads::print_configuration( std::cout , true /* detailed */ );
-  }
-
-  static void TearDownTestCase()
-  {
-    Kokkos::Threads::finalize();
-  }
-};
-
-TEST_F( threads , init ) {
-  ;
-}
-
-TEST_F( threads , md_range ) {
-  TestMDRange_2D< Kokkos::Threads >::test_for2(100,100);
-
-  TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100);
-}
-
-TEST_F( threads , dispatch )
-{
-  const int repeat = 100 ;
-  for ( int i = 0 ; i < repeat ; ++i ) {
-  for ( int j = 0 ; j < repeat ; ++j ) {
-    Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j)
-                        , KOKKOS_LAMBDA( int ) {} );
-  }}
-}
-
-TEST_F( threads , impl_shared_alloc ) {
-  test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >();
-}
-
-TEST_F( threads, policy_construction) {
-  TestRangePolicyConstruction< Kokkos::Threads >();
-  TestTeamPolicyConstruction< Kokkos::Threads >();
-}
-
-TEST_F( threads , impl_view_mapping ) {
-  test_view_mapping< Kokkos::Threads >();
-  test_view_mapping_subview< Kokkos::Threads >();
-  test_view_mapping_operator< Kokkos::Threads >();
-  TestViewMappingAtomic< Kokkos::Threads >::run();
-}
-
-
-TEST_F( threads, view_impl) {
-  test_view_impl< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_api) {
-  TestViewAPI< double , Kokkos::Threads >();
-}
-
-TEST_F( threads , view_nested_view )
-{
-  ::Test::view_nested_view< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_auto_1d_left ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_auto_1d_right ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_auto_1d_stride ) {
-  TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_assign_strided ) {
-  TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_left_0 ) {
-  TestViewSubview::test_left_0< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_left_1 ) {
-  TestViewSubview::test_left_1< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_left_2 ) {
-  TestViewSubview::test_left_2< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_left_3 ) {
-  TestViewSubview::test_left_3< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_right_0 ) {
-  TestViewSubview::test_right_0< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_right_1 ) {
-  TestViewSubview::test_right_1< Kokkos::Threads >();
-}
-
-TEST_F( threads, view_subview_right_3 ) {
-  TestViewSubview::test_right_3< Kokkos::Threads >();
-}
-
-
-TEST_F( threads, view_aggregate ) {
-  TestViewAggregate< Kokkos::Threads >();
-  TestViewAggregateReduction< Kokkos::Threads >();
-}
-
-TEST_F( threads , range_tag )
-{
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(2);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(2);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
-  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
-}
-
-TEST_F( threads , team_tag )
-{
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
-  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
-}
-
-TEST_F( threads, long_reduce) {
-  TestReduce< long ,   Kokkos::Threads >( 1000000 );
-}
-
-TEST_F( threads, double_reduce) {
-  TestReduce< double ,   Kokkos::Threads >( 1000000 );
-}
-
-TEST_F( threads , reducers )
-{
-  TestReducers<int, Kokkos::Threads>::execute_integer();
-  TestReducers<size_t, Kokkos::Threads>::execute_integer();
-  TestReducers<double, Kokkos::Threads>::execute_float();
-  TestReducers<Kokkos::complex<double>, Kokkos::Threads>::execute_basic();
-}
-
-TEST_F( threads, team_long_reduce) {
-  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( threads, team_double_reduce) {
-  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( threads, long_reduce_dynamic ) {
-  TestReduceDynamic< long ,   Kokkos::Threads >( 1000000 );
-}
-
-TEST_F( threads, double_reduce_dynamic ) {
-  TestReduceDynamic< double ,   Kokkos::Threads >( 1000000 );
-}
-
-TEST_F( threads, long_reduce_dynamic_view ) {
-  TestReduceDynamicView< long ,   Kokkos::Threads >( 1000000 );
-}
-
-TEST_F( threads, team_shared_request) {
-  TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >();
-  TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >();
-}
-
-#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
-TEST_F( threads, team_lambda_shared_request) {
-  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >();
-  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >();
-}
-#endif
-
-TEST_F( threads, shmem_size) {
-  TestShmemSize< Kokkos::Threads >();
-}
-
-TEST_F( threads , view_remap )
-{
-  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
-
-  typedef Kokkos::View< double*[N1][N2][N3] ,
-                             Kokkos::LayoutRight ,
-                             Kokkos::Threads > output_type ;
-
-  typedef Kokkos::View< int**[N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::Threads > input_type ;
-
-  typedef Kokkos::View< int*[N0][N2][N3] ,
-                             Kokkos::LayoutLeft ,
-                             Kokkos::Threads > diff_type ;
-
-  output_type output( "output" , N0 );
-  input_type  input ( "input" , N0 , N1 );
-  diff_type   diff  ( "diff" , N0 );
-
-  int value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    input(i0,i1,i2,i3) = ++value ;
-  }}}}
-
-  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
-  Kokkos::deep_copy( output , input );
-
-  value = 0 ;
-  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
-  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
-  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
-  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
-    ++value ;
-    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
-  }}}}
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( threads , atomics )
-{
-  const int loop_count = 1e6 ;
-
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) );
-
-  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,1) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,2) ) );
-  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,3) ) );
-}
-
-TEST_F( threads , atomic_operations )
-{
-  const int start = 1; //Avoid zero for division
-  const int end = 11;
-  for (int i = start; i < end; ++i)
-  {
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 4 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 5 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 6 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 7 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 8 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 9 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 4 ) ) );
-
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 1 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 2 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 3 ) ) );
-    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 4 ) ) );
-  }
-
-}
-
-//----------------------------------------------------------------------------
-
-#if 0
-TEST_F( threads , scan_small )
-{
-  typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ;
-  for ( int i = 0 ; i < 1000 ; ++i ) {
-    TestScanFunctor( 10 );
-    TestScanFunctor( 10000 );
-  }
-  TestScanFunctor( 1000000 );
-  TestScanFunctor( 10000000 );
-
-  Kokkos::Threads::fence();
-}
-#endif
-
-TEST_F( threads , scan )
-{
-  TestScan< Kokkos::Threads >::test_range( 1 , 1000 );
-  TestScan< Kokkos::Threads >( 1000000 );
-  TestScan< Kokkos::Threads >( 10000000 );
-  Kokkos::Threads::fence();
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( threads , team_scan )
-{
-  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10 );
-  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
-  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10000 );
-  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( threads , compiler_macros )
-{
-  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) );
-}
-
-TEST_F( threads , memory_space )
-{
-  TestMemorySpace< Kokkos::Threads >();
-}
-
-TEST_F( threads , memory_pool )
-{
-  bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 );
-  ASSERT_TRUE( val );
-
-  TestMemoryPool::test_mempool2< Kokkos::Threads >( 64, 4, 1000000, 2000000 );
-
-  TestMemoryPool::test_memory_exhaustion< Kokkos::Threads >();
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( threads , template_meta_functions )
-{
-  TestTemplateMetaFunctions<int, Kokkos::Threads >();
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
-TEST_F( threads , cxx11 )
-{
-  if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) {
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) );
-    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) );
-  }
-}
-
-TEST_F( threads , reduction_deduction )
-{
-  TestCXX11::test_reduction_deduction< Kokkos::Threads >();
-}
-#endif /* #if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) */
-
-TEST_F( threads , team_vector )
-{
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) );
-}
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-TEST_F( threads , task_policy )
-{
-  TestTaskPolicy::test_task_dep< Kokkos::Threads >( 10 );
-
-  for ( long i = 0 ; i < 25 ; ++i ) {
-//    printf( "test_fib():  %2ld\n", i );
-    TestTaskPolicy::test_fib< Kokkos::Threads >(i);
-  }
-  for ( long i = 0 ; i < 35 ; ++i ) {
-//    printf( "test_fib2(): %2ld\n", i );
-    TestTaskPolicy::test_fib2< Kokkos::Threads >(i);
-  }
-}
-
-TEST_F( threads , task_team )
-{
-  TestTaskPolicy::test_task_team< Kokkos::Threads >(1000);
-}
-
-TEST_F( threads , task_latch )
-{
-  TestTaskPolicy::test_latch< Kokkos::Threads >(10);
-  TestTaskPolicy::test_latch< Kokkos::Threads >(1000);
-}
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
-
-} // namespace Test
-
-#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp
index dfb2bd81b3dec3485688f9827d3f1f7ad24ddb9d..842131debb69b54ad08fd0eb90836510be50d7ca 100644
--- a/lib/kokkos/core/unit_test/TestTile.hpp
+++ b/lib/kokkos/core/unit_test/TestTile.hpp
@@ -43,6 +43,7 @@
 #define TEST_TILE_HPP
 
 #include <Kokkos_Core.hpp>
+#include <impl/Kokkos_ViewTile.hpp>
 
 namespace TestTile {
 
@@ -102,7 +103,7 @@ struct ReduceTileErrors
 
     if ( jtile < tile_dim1 ) {
 
-      tile_type tile = Kokkos::tile_subview( m_array , itile , jtile );
+      tile_type tile = Kokkos::Experimental::tile_subview( m_array , itile , jtile );
 
       if ( tile(0,0) != ptrdiff_t(( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) {
         ++errors ;
diff --git a/lib/kokkos/core/unit_test/TestUtilities.hpp b/lib/kokkos/core/unit_test/TestUtilities.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..947be03e399bee3c23f4c4f333c34c0e6a9d4d08
--- /dev/null
+++ b/lib/kokkos/core/unit_test/TestUtilities.hpp
@@ -0,0 +1,306 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+
+#include <stdexcept>
+#include <sstream>
+#include <iostream>
+
+#include <Kokkos_Core.hpp>
+
+/*--------------------------------------------------------------------------*/
+
+namespace Test {
+
+inline
+void test_utilities()
+{
+  using namespace Kokkos::Impl;
+  {
+    using i = integer_sequence<int>;
+    using j = make_integer_sequence<int,0>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 0u, "Error: integer_sequence.size()" );
+  }
+
+
+  {
+    using i = integer_sequence<int,0>;
+    using j = make_integer_sequence<int,1>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 1u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+  }
+
+
+  {
+    using i = integer_sequence<int,0,1>;
+    using j = make_integer_sequence<int,2>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 2u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2>;
+    using j = make_integer_sequence<int,3>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 3u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3>;
+    using j = make_integer_sequence<int,4>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 4u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3,4>;
+    using j = make_integer_sequence<int,5>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 5u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3,4,5>;
+    using j = make_integer_sequence<int,6>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 6u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3,4,5,6>;
+    using j = make_integer_sequence<int,7>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 7u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3,4,5,6,7>;
+    using j = make_integer_sequence<int,8>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 8u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3,4,5,6,7,8>;
+    using j = make_integer_sequence<int,9>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 9u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = integer_sequence<int,0,1,2,3,4,5,6,7,8,9>;
+    using j = make_integer_sequence<int,10>;
+
+    static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" );
+    static_assert( i::size() == 10u, "Error: integer_sequence.size()" );
+
+    static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" );
+    static_assert( integer_sequence_at<9, i>::value == 9, "Error: integer_sequence_at" );
+
+    static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" );
+    static_assert( at(9, i{}) == 9, "Error: at(unsigned, integer_sequence)" );
+  }
+
+  {
+    using i = make_integer_sequence<int, 5>;
+    using r = reverse_integer_sequence<i>;
+    using gr = integer_sequence<int, 4, 3, 2, 1, 0>;
+
+    static_assert( std::is_same<r,gr>::value, "Error: reverse_integer_sequence" );
+  }
+
+  {
+    using s = make_integer_sequence<int,10>;
+    using e = exclusive_scan_integer_sequence<s>;
+    using i = inclusive_scan_integer_sequence<s>;
+
+    using ge = integer_sequence<int, 0, 0, 1, 3, 6, 10, 15, 21, 28, 36>;
+    using gi = integer_sequence<int, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45>;
+
+    static_assert( e::value == 45, "Error: scan value");
+    static_assert( i::value == 45, "Error: scan value");
+
+    static_assert( std::is_same< e::type, ge >::value, "Error: exclusive_scan");
+    static_assert( std::is_same< i::type, gi >::value, "Error: inclusive_scan");
+  }
+
+
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp
index ae4c6d2185d12bdf1f61ab66c73244e6b38bb50b..88b474db1de466d1cedfb19633b73eef28e0b943 100644
--- a/lib/kokkos/core/unit_test/TestViewAPI.hpp
+++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp
@@ -55,8 +55,6 @@
 
 namespace Test {
 
-#if KOKKOS_USING_EXP_VIEW
-
 template< class T , class ... P >
 size_t allocation_count( const Kokkos::View<T,P...> & view )
 {
@@ -68,19 +66,6 @@ size_t allocation_count( const Kokkos::View<T,P...> & view )
   return (card <= alloc && memory_span == 400) ? alloc : 0 ;
 }
 
-#else
-
-template< class T , class L , class D , class M , class S >
-size_t allocation_count( const Kokkos::View<T,L,D,M,S> & view )
-{
-  const size_t card  = Kokkos::Impl::cardinality_count( view.shape() );
-  const size_t alloc = view.capacity();
-
-  return card <= alloc ? alloc : 0 ;
-}
-
-#endif
-
 /*--------------------------------------------------------------------------*/
 
 template< typename T, class DeviceType>
@@ -657,7 +642,6 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 >
       if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; }
     }
 
-#if KOKKOS_USING_EXP_VIEW
     for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 )
     for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 )
     for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 )
@@ -665,7 +649,6 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 >
       if ( & left(i0,i1,i2)  != & left(i0,i1,i2,0,0,0,0,0) )  { update |= 3 ; }
       if ( & right(i0,i1,i2) != & right(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; }
     }
-#endif
   }
 };
 
@@ -742,14 +725,12 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 >
       offset = j ;
     }
 
-#if KOKKOS_USING_EXP_VIEW
     for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 )
     for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 )
     {
       if ( & left(i0,i1)  != & left(i0,i1,0,0,0,0,0,0) )  { update |= 3 ; }
       if ( & right(i0,i1) != & right(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; }
     }
-#endif
   }
 };
 
@@ -813,10 +794,8 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 >
   {
     for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 )
     {
-#if KOKKOS_USING_EXP_VIEW
       if ( & left(i0)  != & left(i0,0,0,0,0,0,0,0) )  { update |= 3 ; }
       if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; }
-#endif
       if ( & left(i0)  != & left_stride(i0) ) { update |= 4 ; }
       if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; }
     }
@@ -1021,18 +1000,10 @@ public:
     dx = dView4( "dx" , N0 );
     dy = dView4( "dy" , N0 );
 
-    #if KOKKOS_USING_EXP_VIEW
     ASSERT_EQ( dx.use_count() , size_t(1) );
-    #else
-    ASSERT_EQ( dx.tracker().ref_count() , size_t(1) );
-    #endif
 
     dView4_unmanaged unmanaged_dx = dx;
-    #if KOKKOS_USING_EXP_VIEW
     ASSERT_EQ( dx.use_count() , size_t(1) );
-    #else
-    ASSERT_EQ( dx.tracker().ref_count() , size_t(1) );
-    #endif
 
     dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(dx.ptr_on_device(),
                                                               dx.dimension_0(),
@@ -1050,48 +1021,24 @@ public:
     }
 
     const_dView4 const_dx = dx ;
-    #if KOKKOS_USING_EXP_VIEW
     ASSERT_EQ( dx.use_count() , size_t(2) );
-    #else
-    ASSERT_EQ( dx.tracker().ref_count() , size_t(2) );
-    #endif
 
     {
       const_dView4 const_dx2;
       const_dx2 = const_dx;
-      #if KOKKOS_USING_EXP_VIEW
       ASSERT_EQ( dx.use_count() , size_t(3) );
-      #else
-      ASSERT_EQ( dx.tracker().ref_count() , size_t(3) );
-      #endif
 
       const_dx2 = dy;
-      #if KOKKOS_USING_EXP_VIEW
       ASSERT_EQ( dx.use_count() , size_t(2) );
-      #else
-      ASSERT_EQ( dx.tracker().ref_count() , size_t(2) );
-      #endif
 
       const_dView4 const_dx3(dx);
-      #if KOKKOS_USING_EXP_VIEW
       ASSERT_EQ( dx.use_count() , size_t(3) );
-      #else
-      ASSERT_EQ( dx.tracker().ref_count() , size_t(3) );
-      #endif
       
       dView4_unmanaged dx4_unmanaged(dx);
-      #if KOKKOS_USING_EXP_VIEW
       ASSERT_EQ( dx.use_count() , size_t(3) );
-      #else
-      ASSERT_EQ( dx.tracker().ref_count() , size_t(3) );
-      #endif
     }
 
-    #if KOKKOS_USING_EXP_VIEW
     ASSERT_EQ( dx.use_count() , size_t(2) );
-    #else
-    ASSERT_EQ( dx.tracker().ref_count() , size_t(2) );
-    #endif
 
 
     ASSERT_FALSE( dx.ptr_on_device() == 0 );
@@ -1120,7 +1067,6 @@ public:
     // T v2 = hx(0,0) ; // Generates compile error as intended
     // hx(0,0) = v2 ;   // Generates compile error as intended
 
-#if ! KOKKOS_USING_EXP_VIEW
     // Testing with asynchronous deep copy with respect to device
     {
       size_t count = 0 ;
@@ -1185,7 +1131,6 @@ public:
         { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); }
       }}}}
     }
-#endif /* #if ! KOKKOS_USING_EXP_VIEW */
 
     // Testing with synchronous deep copy
     {
diff --git a/lib/kokkos/core/unit_test/TestViewImpl.hpp b/lib/kokkos/core/unit_test/TestViewImpl.hpp
deleted file mode 100644
index c34ef759d1dd41bbb9238ccdb37f2aa28955af6d..0000000000000000000000000000000000000000
--- a/lib/kokkos/core/unit_test/TestViewImpl.hpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#include <gtest/gtest.h>
-
-#include <stdexcept>
-#include <sstream>
-#include <iostream>
-
-#include <Kokkos_Core.hpp>
-
-/*--------------------------------------------------------------------------*/
-
-#if KOKKOS_USING_EXP_VIEW
-
-namespace Test {
-
-template < class Device >
-void test_view_impl() {}
-
-}
-
-#else
-
-/*--------------------------------------------------------------------------*/
-
-namespace Test {
-
-struct DummyMemorySpace
-{
-  typedef DummyMemorySpace memory_space ;
-  typedef unsigned size_type ;
-};
-
-/*--------------------------------------------------------------------------*/
-
-template< class Type >
-struct DefineShape {
-  typedef typename Kokkos::Impl::AnalyzeShape<Type>::shape type ;
-};
-
-template< class Type >
-struct ExtractValueType {
-  typedef typename Kokkos::Impl::AnalyzeShape<Type>::value_type type ;
-};
-
-template< class Type >
-struct ArrayType { typedef Type type ; };
-
-template < class Device >
-void test_view_impl()
-{
-  //typedef typename Device::memory_space memory_space ; // unused
-
-  typedef ArrayType< int[100]                >::type type_01 ;
-  typedef ArrayType< int*                    >::type type_11 ;
-  typedef ArrayType< int[5][6][700]          >::type type_03 ;
-  typedef ArrayType< double*[8][9][900]      >::type type_14 ;
-  typedef ArrayType< long**                  >::type type_22 ;
-  typedef ArrayType< short **[5][6][7]       >::type type_25 ;
-  typedef ArrayType< const short **[5][6][7] >::type const_type_25 ;
-  typedef ArrayType< short***[5][6][7]       >::type type_36 ;
-  typedef ArrayType< const short***[5][6][7] >::type const_type_36 ;
-
-  // mfh 14 Feb 2014: With gcc 4.8.2 -Wall, this emits a warning:
-  //
-  // typedef ‘ok_const_25’ locally defined but not used [-Wunused-local-typedefs]
-  //
-  // It's unfortunate that this is the case, because the typedef is
-  // being used for a compile-time check!  We deal with this by
-  // declaring an instance of ok_const_25, and marking it with
-  // "(void)" so that instance doesn't emit an "unused variable"
-  // warning.
-  //
-  // typedef typename Kokkos::Impl::StaticAssertSame<
-  //    typename Kokkos::Impl::AnalyzeShape<type_25>::const_type ,
-  //    typename Kokkos::Impl::AnalyzeShape<const_type_25>::type
-  //      > ok_const_25 ;
-
-  typedef typename Kokkos::Impl::StaticAssertSame<
-    typename Kokkos::Impl::AnalyzeShape<type_25>::const_type,
-    typename Kokkos::Impl::AnalyzeShape<const_type_25>::type
-      > ok_const_25 ;
-
-  typedef typename Kokkos::Impl::StaticAssertSame<
-    typename Kokkos::Impl::AnalyzeShape<type_36>::const_type,
-    typename Kokkos::Impl::AnalyzeShape<const_type_36>::type
-      > ok_const_36 ;
-  {
-    ok_const_25 thing_25 ;
-    ok_const_36 thing_36 ;
-    (void) thing_25 ; // silence warning for unused variable
-    (void) thing_36 ; // silence warning for unused variable
-  }
-
-  ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_03>::type , int >::value ) );
-  ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_14>::type , double >::value ) );
-  ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_22>::type , long >::value ) );
-  ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_36>::type , short >::value ) );
-
-  ASSERT_FALSE( ( Kokkos::Impl::is_same< ExtractValueType<type_36>::type , int >::value ) );
-
-  typedef typename DefineShape< type_01 >::type  shape_01_type ;
-  typedef typename DefineShape< type_11 >::type  shape_11_type ;
-  typedef typename DefineShape< type_03 >::type  shape_03_type ;
-  typedef typename DefineShape< type_14 >::type  shape_14_type ;
-  typedef typename DefineShape< type_22 >::type  shape_22_type ;
-  typedef typename DefineShape< type_36 >::type  shape_36_type ;
-
-  ASSERT_TRUE( ( Kokkos::Impl::StaticAssert< shape_36_type::rank == 6 >::value ) );
-  ASSERT_TRUE( ( Kokkos::Impl::StaticAssert< shape_03_type::rank == 3 >::value ) );
-
-  shape_01_type shape_01 ; shape_01_type::assign( shape_01 );
-  shape_11_type shape_11 ; shape_11_type::assign( shape_11, 1000 );
-  shape_03_type shape_03 ; shape_03_type::assign( shape_03 );
-  shape_14_type shape_14 ; shape_14_type::assign( shape_14 , 0 );
-  shape_22_type shape_22 ; shape_22_type::assign( shape_22 , 0 , 0 );
-  shape_36_type shape_36 ; shape_36_type::assign( shape_36 , 10 , 20 , 30 );
-
-  ASSERT_TRUE( shape_01.rank_dynamic == 0u );
-  ASSERT_TRUE( shape_01.rank         == 1u );
-  ASSERT_TRUE( shape_01.N0           == 100u );
-
-  ASSERT_TRUE( shape_11.rank_dynamic == 1u );
-  ASSERT_TRUE( shape_11.rank         == 1u );
-  ASSERT_TRUE( shape_11.N0           == 1000u );
-
-  ASSERT_TRUE( shape_03.rank_dynamic == 0u );
-  ASSERT_TRUE( shape_03.rank         == 3u );
-  ASSERT_TRUE( shape_03.N0           == 5u );
-  ASSERT_TRUE( shape_03.N1           == 6u );
-  ASSERT_TRUE( shape_03.N2           == 700u );
-
-  ASSERT_TRUE( shape_14.rank_dynamic == 1u );
-  ASSERT_TRUE( shape_14.rank         == 4u );
-  ASSERT_TRUE( shape_14.N0           == 0u );
-  ASSERT_TRUE( shape_14.N1           == 8u );
-  ASSERT_TRUE( shape_14.N2           == 9u );
-  ASSERT_TRUE( shape_14.N3           == 900u );
-
-  ASSERT_TRUE( shape_22.rank_dynamic == 2u );
-  ASSERT_TRUE( shape_22.rank         == 2u );
-  ASSERT_TRUE( shape_22.N0           == 0u );
-  ASSERT_TRUE( shape_22.N1           == 0u );
-
-  ASSERT_TRUE( shape_36.rank_dynamic == 3u );
-  ASSERT_TRUE( shape_36.rank         == 6u );
-  ASSERT_TRUE( shape_36.N0           == 10u );
-  ASSERT_TRUE( shape_36.N1           == 20u );
-  ASSERT_TRUE( shape_36.N2           == 30u );
-  ASSERT_TRUE( shape_36.N3           == 5u  );
-  ASSERT_TRUE( shape_36.N4           == 6u  );
-  ASSERT_TRUE( shape_36.N5           == 7u  );
-
-
-  ASSERT_TRUE( shape_01 == shape_01 );
-  ASSERT_TRUE( shape_11 == shape_11 );
-  ASSERT_TRUE( shape_36 == shape_36 );
-  ASSERT_TRUE( shape_01 != shape_36 );
-  ASSERT_TRUE( shape_22 != shape_36 );
-
-  //------------------------------------------------------------------------
-
-  typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutLeft > shape_01_left_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_11_type , Kokkos::LayoutLeft > shape_11_left_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_03_type , Kokkos::LayoutLeft > shape_03_left_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_14_type , Kokkos::LayoutLeft > shape_14_left_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_22_type , Kokkos::LayoutLeft > shape_22_left_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutLeft > shape_36_left_offset ;
-
-  typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutRight > shape_01_right_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_11_type , Kokkos::LayoutRight > shape_11_right_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_03_type , Kokkos::LayoutRight > shape_03_right_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_14_type , Kokkos::LayoutRight > shape_14_right_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_22_type , Kokkos::LayoutRight > shape_22_right_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutRight > shape_36_right_offset ;
-
-  ASSERT_TRUE( ! shape_01_left_offset::has_padding );
-  ASSERT_TRUE( ! shape_11_left_offset::has_padding );
-  ASSERT_TRUE( ! shape_03_left_offset::has_padding );
-  ASSERT_TRUE(   shape_14_left_offset::has_padding );
-  ASSERT_TRUE(   shape_22_left_offset::has_padding );
-  ASSERT_TRUE(   shape_36_left_offset::has_padding );
-
-  ASSERT_TRUE( ! shape_01_right_offset::has_padding );
-  ASSERT_TRUE( ! shape_11_right_offset::has_padding );
-  ASSERT_TRUE( ! shape_03_right_offset::has_padding );
-  ASSERT_TRUE( ! shape_14_right_offset::has_padding );
-  ASSERT_TRUE(   shape_22_right_offset::has_padding );
-  ASSERT_TRUE(   shape_36_right_offset::has_padding );
-
-  //------------------------------------------------------------------------
-
-  typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutStride > shape_01_stride_offset ;
-  typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutStride > shape_36_stride_offset ;
-
-  {
-    shape_01_stride_offset stride_offset_01 ;
-
-    stride_offset_01.assign( 1, stride_offset_01.N0, 0,0,0,0,0,0,0 );
-
-    ASSERT_EQ( int(stride_offset_01.S[0]) , int(1) );
-    ASSERT_EQ( int(stride_offset_01.S[1]) , int(stride_offset_01.N0) );
-  }
-
-  {
-    shape_36_stride_offset stride_offset_36 ;
-
-    size_t str[7] ;
-    str[5] = 1 ;
-    str[4] = str[5] * stride_offset_36.N5 ;
-    str[3] = str[4] * stride_offset_36.N4 ;
-    str[2] = str[3] * stride_offset_36.N3 ;
-    str[1] = str[2] * 100 ;
-    str[0] = str[1] * 200 ;
-    str[6] = str[0] * 300 ;
-
-    stride_offset_36.assign( str[0] , str[1] , str[2] , str[3] , str[4] , str[5] , str[6] , 0 , 0 );
-
-    ASSERT_EQ( size_t(stride_offset_36.S[6]) , size_t(str[6]) );
-    ASSERT_EQ( size_t(stride_offset_36.N2)   , size_t(100) );
-    ASSERT_EQ( size_t(stride_offset_36.N1)   , size_t(200) );
-    ASSERT_EQ( size_t(stride_offset_36.N0)   , size_t(300) );
-  }
-
-  //------------------------------------------------------------------------
-
-  {
-    const int rank = 6 ;
-    const int order[] = { 5 , 3 , 1 , 0 , 2 , 4 };
-    const unsigned dim[] = { 2 , 3 , 5 , 7 , 11 , 13 };
-    Kokkos::LayoutStride stride_6 = Kokkos::LayoutStride::order_dimensions( rank , order , dim );
-    size_t n = 1 ;
-    for ( int i = 0 ; i < rank ; ++i ) {
-      ASSERT_EQ( size_t(dim[i]) , size_t( stride_6.dimension[i] ) );
-      ASSERT_EQ( size_t(n) , size_t( stride_6.stride[ order[i] ] ) );
-      n *= dim[order[i]] ;
-    }
-  }
-
-  //------------------------------------------------------------------------
-}
-
-} /* namespace Test */
-
-#endif
-
-/*--------------------------------------------------------------------------*/
-
diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping.hpp
index eddb81bed5cfaa855dc51a43d4a560bc69030543..8989ee74c8b45f2375567ab6f22abe9a943ec79c 100644
--- a/lib/kokkos/core/unit_test/TestViewMapping.hpp
+++ b/lib/kokkos/core/unit_test/TestViewMapping.hpp
@@ -99,47 +99,67 @@ void test_view_mapping()
   ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) );
   ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) );
 
-  ASSERT_EQ( int(dim_0::rank) , int(0) );
-  ASSERT_EQ( int(dim_0::rank_dynamic) , int(0) );
-
-  ASSERT_EQ( int(dim_s2::rank) , int(1) );
-  ASSERT_EQ( int(dim_s2::rank_dynamic) , int(0) );
-
-  ASSERT_EQ( int(dim_s2_s3::rank) , int(2) );
-  ASSERT_EQ( int(dim_s2_s3::rank_dynamic) , int(0) );
-
-  ASSERT_EQ( int(dim_s2_s3_s4::rank) , int(3) );
-  ASSERT_EQ( int(dim_s2_s3_s4::rank_dynamic) , int(0) );
-
-  ASSERT_EQ( int(dim_s0::rank) , int(1) );
-  ASSERT_EQ( int(dim_s0::rank_dynamic) , int(1) );
-
-  ASSERT_EQ( int(dim_s0_s3::rank) , int(2) );
-  ASSERT_EQ( int(dim_s0_s3::rank_dynamic) , int(1) );
-
-  ASSERT_EQ( int(dim_s0_s3_s4::rank) , int(3) );
-  ASSERT_EQ( int(dim_s0_s3_s4::rank_dynamic) , int(1) );
-
-  ASSERT_EQ( int(dim_s0_s0_s4::rank) , int(3) );
-  ASSERT_EQ( int(dim_s0_s0_s4::rank_dynamic) , int(2) );
-
-  ASSERT_EQ( int(dim_s0_s0_s0::rank) , int(3) );
-  ASSERT_EQ( int(dim_s0_s0_s0::rank_dynamic) , int(3) );
-
-  ASSERT_EQ( int(dim_s0_s0_s0_s0::rank) , int(4) );
-  ASSERT_EQ( int(dim_s0_s0_s0_s0::rank_dynamic) , int(4) );
-
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank) , int(5) );
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank_dynamic) , int(5) );
-
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank) , int(6) );
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(6) );
-
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) , int(7) );
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(7) );
-
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) , int(8) );
-  ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(8) );
+  static_assert( int(dim_0::rank) == int(0) , "" );
+  static_assert( int(dim_0::rank_dynamic) == int(0) , "" );
+  static_assert( int(dim_0::ArgN0) == 1 , "" );
+  static_assert( int(dim_0::ArgN1) == 1 , "" );
+  static_assert( int(dim_0::ArgN2) == 1 , "" );
+
+  static_assert( int(dim_s2::rank) == int(1) , "" );
+  static_assert( int(dim_s2::rank_dynamic) == int(0) , "" );
+  static_assert( int(dim_s2::ArgN0) == 2 , "" );
+  static_assert( int(dim_s2::ArgN1) == 1 , "" );
+
+  static_assert( int(dim_s2_s3::rank) == int(2) , "" );
+  static_assert( int(dim_s2_s3::rank_dynamic) == int(0) , "" );
+  static_assert( int(dim_s2_s3::ArgN0) == 2 , "" );
+  static_assert( int(dim_s2_s3::ArgN1) == 3 , "" );
+  static_assert( int(dim_s2_s3::ArgN2) == 1 , "" );
+
+  static_assert( int(dim_s2_s3_s4::rank) == int(3) , "" );
+  static_assert( int(dim_s2_s3_s4::rank_dynamic) == int(0) , "" );
+  static_assert( int(dim_s2_s3_s4::ArgN0) == 2 , "" );
+  static_assert( int(dim_s2_s3_s4::ArgN1) == 3 , "" );
+  static_assert( int(dim_s2_s3_s4::ArgN2) == 4 , "" );
+  static_assert( int(dim_s2_s3_s4::ArgN3) == 1 , "" );
+
+  static_assert( int(dim_s0::rank) == int(1) , "" );
+  static_assert( int(dim_s0::rank_dynamic) == int(1) , "" );
+
+  static_assert( int(dim_s0_s3::rank) == int(2) , "" );
+  static_assert( int(dim_s0_s3::rank_dynamic) == int(1) , "" );
+  static_assert( int(dim_s0_s3::ArgN0) == 0 , "" );
+  static_assert( int(dim_s0_s3::ArgN1) == 3 , "" );
+
+  static_assert( int(dim_s0_s3_s4::rank) == int(3) , "" );
+  static_assert( int(dim_s0_s3_s4::rank_dynamic) == int(1) , "" );
+  static_assert( int(dim_s0_s3_s4::ArgN0) == 0 , "" );
+  static_assert( int(dim_s0_s3_s4::ArgN1) == 3 , "" );
+  static_assert( int(dim_s0_s3_s4::ArgN2) == 4 , "" );
+
+  static_assert( int(dim_s0_s0_s4::rank) == int(3) , "" );
+  static_assert( int(dim_s0_s0_s4::rank_dynamic) == int(2) , "" );
+  static_assert( int(dim_s0_s0_s4::ArgN0) == 0 , "" );
+  static_assert( int(dim_s0_s0_s4::ArgN1) == 0 , "" );
+  static_assert( int(dim_s0_s0_s4::ArgN2) == 4 , "" );
+
+  static_assert( int(dim_s0_s0_s0::rank) == int(3) , "" );
+  static_assert( int(dim_s0_s0_s0::rank_dynamic) == int(3) , "" );
+
+  static_assert( int(dim_s0_s0_s0_s0::rank) == int(4) , "" );
+  static_assert( int(dim_s0_s0_s0_s0::rank_dynamic) == int(4) , "" );
+
+  static_assert( int(dim_s0_s0_s0_s0_s0::rank) == int(5) , "" );
+  static_assert( int(dim_s0_s0_s0_s0_s0::rank_dynamic) == int(5) , "" );
+
+  static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank) == int(6) , "" );
+  static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(6) , "" );
+
+  static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) == int(7) , "" );
+  static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(7) , "" );
+
+  static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) == int(8) , "" );
+  static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(8) , "" );
 
   dim_s0          d1( 2, 3, 4, 5, 6, 7, 8, 9 ); 
   dim_s0_s0       d2( 2, 3, 4, 5, 6, 7, 8, 9 );
@@ -534,6 +554,13 @@ void test_view_mapping()
 
     static_assert( a_int_r1::dimension::rank == 1 , "" );
     static_assert( a_int_r1::dimension::rank_dynamic == 1 , "" );
+    static_assert( a_int_r5::dimension::ArgN0 == 0 , "" );
+    static_assert( a_int_r5::dimension::ArgN1 == 0 , "" );
+    static_assert( a_int_r5::dimension::ArgN2 == 4 , "" );
+    static_assert( a_int_r5::dimension::ArgN3 == 5 , "" );
+    static_assert( a_int_r5::dimension::ArgN4 == 6 , "" );
+    static_assert( a_int_r5::dimension::ArgN5 == 1 , "" );
+
     static_assert( std::is_same< typename a_int_r1::dimension , ViewDimension<0> >::value , "" );
     static_assert( std::is_same< typename a_int_r1::non_const_value_type , int >::value , "" );
 
@@ -545,8 +572,14 @@ void test_view_mapping()
     static_assert( a_const_int_r5::dimension::rank == 5 , "" );
     static_assert( a_const_int_r5::dimension::rank_dynamic == 2 , "" );
 
-    static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" );
+    static_assert( a_const_int_r5::dimension::ArgN0 == 0 , "" );
+    static_assert( a_const_int_r5::dimension::ArgN1 == 0 , "" );
+    static_assert( a_const_int_r5::dimension::ArgN2 == 4 , "" );
+    static_assert( a_const_int_r5::dimension::ArgN3 == 5 , "" );
+    static_assert( a_const_int_r5::dimension::ArgN4 == 6 , "" );
+    static_assert( a_const_int_r5::dimension::ArgN5 == 1 , "" );
 
+    static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" );
     static_assert( std::is_same< typename a_const_int_r5::non_const_value_type , int >::value , "" );
 
     static_assert( a_int_r5::dimension::rank == 5 , "" );
@@ -616,8 +649,8 @@ void test_view_mapping()
   {
     constexpr int N = 10 ;
 
-    typedef Kokkos::Experimental::View<int*,Space>        T ;
-    typedef Kokkos::Experimental::View<const int*,Space>  C ;
+    typedef Kokkos::View<int*,Space>        T ;
+    typedef Kokkos::View<const int*,Space>  C ;
 
     int data[N] ;
 
@@ -669,7 +702,7 @@ void test_view_mapping()
 
     ASSERT_EQ( vr1.dimension_0() , N );
 
-    if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename Space::memory_space , Kokkos::HostSpace >::value ) {
+    if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) {
       for ( int i = 0 ; i < N ; ++i ) data[i] = i + 1 ;
       for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 );
       for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 );
@@ -688,8 +721,8 @@ void test_view_mapping()
 
   {
     constexpr int N = 10 ;
-    typedef Kokkos::Experimental::View<int*,Space>        T ;
-    typedef Kokkos::Experimental::View<const int*,Space>  C ;
+    typedef Kokkos::View<int*,Space>        T ;
+    typedef Kokkos::View<const int*,Space>  C ;
 
     T vr1("vr1",N);
     C cr1(vr1);
@@ -712,7 +745,7 @@ void test_view_mapping()
  
     ASSERT_EQ( vr1.dimension_0() , N );
 
-    if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename Space::memory_space , Kokkos::HostSpace >::value ) {
+    if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) {
       for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 1 ;
       for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 );
       for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 );
@@ -731,8 +764,8 @@ void test_view_mapping()
   // Testing proper handling of zero-length allocations
   {
     constexpr int N = 0 ;
-    typedef Kokkos::Experimental::View<int*,Space>        T ;
-    typedef Kokkos::Experimental::View<const int*,Space>  C ;
+    typedef Kokkos::View<int*,Space>        T ;
+    typedef Kokkos::View<const int*,Space>  C ;
 
     T vr1("vr1",N);
     C cr1(vr1);
@@ -771,7 +804,7 @@ void test_view_mapping()
   }
 
   {
-    typedef Kokkos::Experimental::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace>  traits_t ;
+    typedef Kokkos::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace>  traits_t ;
     typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0>                         dims_t ;
     typedef Kokkos::Experimental::Impl::ViewOffset< dims_t , Kokkos::LayoutStride >  offset_t ;
 
@@ -802,8 +835,9 @@ void test_view_mapping()
   }
 
   {
-    typedef Kokkos::Experimental::View<int**,Space>  V ;
+    typedef Kokkos::View<int**,Space>  V ;
     typedef typename V::HostMirror  M ;
+    typedef typename Kokkos::View<int**,Space>::array_layout layout_type;
 
     constexpr int N0 = 10 ;
     constexpr int N1 = 11 ;
@@ -825,6 +859,14 @@ void test_view_mapping()
       ASSERT_EQ( b(i0,i1) , c(i0,i1) );
 
     Kokkos::Experimental::resize( b , 5 , 6 );
+
+    for ( int i0 = 0 ; i0 < 5 ; ++i0 )
+    for ( int i1 = 0 ; i1 < 6 ; ++i1 ) {
+      int val = 1 + i0 + i1 * N0;
+      ASSERT_EQ( b(i0,i1) , c(i0,i1) );
+      ASSERT_EQ( b(i0,i1) , val );
+    }
+
     Kokkos::Experimental::realloc( c , 5 , 6 );
     Kokkos::Experimental::realloc( d , 5 , 6 );
 
@@ -834,11 +876,89 @@ void test_view_mapping()
     ASSERT_EQ( c.dimension_1() , 6 );
     ASSERT_EQ( d.dimension_0() , 5 );
     ASSERT_EQ( d.dimension_1() , 6 );
+
+    layout_type layout(7,8);
+    Kokkos::Experimental::resize( b , layout );
+    for ( int i0 = 0 ; i0 < 7 ; ++i0 )
+    for ( int i1 = 6 ; i1 < 8 ; ++i1 )
+      b(i0,i1) = 1 + i0 + i1 * N0 ;
+
+    for ( int i0 = 5 ; i0 < 7 ; ++i0 )
+    for ( int i1 = 0 ; i1 < 8 ; ++i1 )
+      b(i0,i1) = 1 + i0 + i1 * N0 ;
+
+    for ( int i0 = 0 ; i0 < 7 ; ++i0 )
+    for ( int i1 = 0 ; i1 < 8 ; ++i1 ) {
+       int val = 1 + i0 + i1 * N0;
+       ASSERT_EQ( b(i0,i1) , val );
+    }
+
+    Kokkos::Experimental::realloc( c , layout );
+    Kokkos::Experimental::realloc( d , layout );
+
+    ASSERT_EQ( b.dimension_0() , 7 );
+    ASSERT_EQ( b.dimension_1() , 8 );
+    ASSERT_EQ( c.dimension_0() , 7 );
+    ASSERT_EQ( c.dimension_1() , 8 );
+    ASSERT_EQ( d.dimension_0() , 7 );
+    ASSERT_EQ( d.dimension_1() , 8 );
+
+  }
+
+  {
+    typedef Kokkos::View<int**,Kokkos::LayoutStride,Space>  V ;
+    typedef typename V::HostMirror  M ;
+    typedef typename Kokkos::View<int**,Kokkos::LayoutStride,Space>::array_layout layout_type;
+
+    constexpr int N0 = 10 ;
+    constexpr int N1 = 11 ;
+
+    const int dimensions[] = {N0,N1};
+    const int order[] = {1,0};
+
+    V a("a",Kokkos::LayoutStride::order_dimensions(2,order,dimensions));
+    M b = Kokkos::Experimental::create_mirror(a);
+    M c = Kokkos::Experimental::create_mirror_view(a);
+    M d ;
+
+    for ( int i0 = 0 ; i0 < N0 ; ++i0 )
+    for ( int i1 = 0 ; i1 < N1 ; ++i1 )
+      b(i0,i1) = 1 + i0 + i1 * N0 ;
+
+    Kokkos::Experimental::deep_copy( a , b );
+    Kokkos::Experimental::deep_copy( c , a );
+
+    for ( int i0 = 0 ; i0 < N0 ; ++i0 )
+    for ( int i1 = 0 ; i1 < N1 ; ++i1 )
+      ASSERT_EQ( b(i0,i1) , c(i0,i1) );
+
+    const int dimensions2[] = {7,8};
+    const int order2[] = {1,0};
+    layout_type layout = layout_type::order_dimensions(2,order2,dimensions2);
+    Kokkos::Experimental::resize( b , layout );
+
+    for ( int i0 = 0 ; i0 < 7 ; ++i0 )
+    for ( int i1 = 0 ; i1 < 8 ; ++i1 ) {
+       int val = 1 + i0 + i1 * N0;
+       ASSERT_EQ( b(i0,i1) , c(i0,i1) );
+       ASSERT_EQ( b(i0,i1) , val );
+    }
+
+    Kokkos::Experimental::realloc( c , layout );
+    Kokkos::Experimental::realloc( d , layout );
+
+    ASSERT_EQ( b.dimension_0() , 7 );
+    ASSERT_EQ( b.dimension_1() , 8 );
+    ASSERT_EQ( c.dimension_0() , 7 );
+    ASSERT_EQ( c.dimension_1() , 8 );
+    ASSERT_EQ( d.dimension_0() , 7 );
+    ASSERT_EQ( d.dimension_1() , 8 );
+
   }
 
   {
-    typedef Kokkos::Experimental::View<int*,Space> V ;
-    typedef Kokkos::Experimental::View<int*,Space,Kokkos::MemoryUnmanaged> U ;
+    typedef Kokkos::View<int*,Space> V ;
+    typedef Kokkos::View<int*,Space,Kokkos::MemoryUnmanaged> U ;
 
 
     V a("a",10);
@@ -873,10 +993,10 @@ void test_view_mapping()
     ASSERT_EQ( a.use_count() , 1 );
     ASSERT_EQ( b.use_count() , 0 );
 
-#if KOKKOS_USING_EXP_VIEW && ! defined ( KOKKOS_CUDA_USE_LAMBDA )
+#if ! defined ( KOKKOS_CUDA_USE_LAMBDA )
     /* Cannot launch host lambda when CUDA lambda is enabled */
 
-    typedef typename Kokkos::Impl::is_space< Space >::host_execution_space
+    typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space
       host_exec_space ;
 
     Kokkos::parallel_for(
@@ -904,31 +1024,31 @@ struct TestViewMappingSubview
   typedef Kokkos::pair<int,int> range ;
 
   enum { AN = 10 };
-  typedef Kokkos::Experimental::View<int*,ExecSpace>  AT ;
-  typedef Kokkos::Experimental::View<const int*,ExecSpace>  ACT ;
-  typedef Kokkos::Experimental::Subview< AT , range >  AS ;
+  typedef Kokkos::View<int*,ExecSpace>  AT ;
+  typedef Kokkos::View<const int*,ExecSpace>  ACT ;
+  typedef Kokkos::Subview< AT , range >  AS ;
 
   enum { BN0 = 10 , BN1 = 11 , BN2 = 12 };
-  typedef Kokkos::Experimental::View<int***,ExecSpace>  BT ;
-  typedef Kokkos::Experimental::Subview< BT , range , range , range >  BS ;
+  typedef Kokkos::View<int***,ExecSpace>  BT ;
+  typedef Kokkos::Subview< BT , range , range , range >  BS ;
 
   enum { CN0 = 10 , CN1 = 11 , CN2 = 12 };
-  typedef Kokkos::Experimental::View<int***[13][14],ExecSpace>  CT ;
-  typedef Kokkos::Experimental::Subview< CT , range , range , range , int , int >  CS ;
+  typedef Kokkos::View<int***[13][14],ExecSpace>  CT ;
+  typedef Kokkos::Subview< CT , range , range , range , int , int >  CS ;
 
   enum { DN0 = 10 , DN1 = 11 , DN2 = 12 , DN3 = 13 , DN4 = 14 };
-  typedef Kokkos::Experimental::View<int***[DN3][DN4],ExecSpace>  DT ;
-  typedef Kokkos::Experimental::Subview< DT , int , range , range , range , int >  DS ;
+  typedef Kokkos::View<int***[DN3][DN4],ExecSpace>  DT ;
+  typedef Kokkos::Subview< DT , int , range , range , range , int >  DS ;
 
 
-  typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace>  DLT ;
-  typedef Kokkos::Experimental::Subview< DLT , range , int , int , int , int >  DLS1 ;
+  typedef Kokkos::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace>  DLT ;
+  typedef Kokkos::Subview< DLT , range , int , int , int , int >  DLS1 ;
 
   static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout , Kokkos::LayoutLeft >::value
                , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" );
 
-  typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutRight,ExecSpace>  DRT ;
-  typedef Kokkos::Experimental::Subview< DRT , int , int , int , int , range >  DRS1 ;
+  typedef Kokkos::View<int***[13][14],Kokkos::LayoutRight,ExecSpace>  DRT ;
+  typedef Kokkos::Subview< DRT , int , int , int , int , range >  DRS1 ;
 
   static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout , Kokkos::LayoutRight >::value
                , "Subview layout error for rank 1 subview of right-most range of LayoutRight" );
@@ -1179,23 +1299,23 @@ void test_view_mapping_operator()
 {
   typedef typename Space::execution_space ExecSpace ;
 
-  TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run();
-
-  TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutRight,ExecSpace> >::run();
-  TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run();
+
+  TestViewMapOperator< Kokkos::View<int,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int*,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int**,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int***,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int****,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int******,Kokkos::LayoutRight,ExecSpace> >::run();
+  TestViewMapOperator< Kokkos::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run();
 }
 
 /*--------------------------------------------------------------------------*/
@@ -1207,8 +1327,8 @@ struct TestViewMappingAtomic {
 
   typedef Kokkos::MemoryTraits< Kokkos::Atomic >  mem_trait ;
 
-  typedef Kokkos::Experimental::View< int * , ExecSpace > T ;
-  typedef Kokkos::Experimental::View< int * , ExecSpace , mem_trait >  T_atom ;
+  typedef Kokkos::View< int * , ExecSpace > T ;
+  typedef Kokkos::View< int * , ExecSpace , mem_trait >  T_atom ;
 
   T      x ;
   T_atom x_atom ;
diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp
index 9b23a5d5597e2260e1a73b9f9b5b6b50a911567e..381b8786bc740dfcfb922eb6ddf5443ffa7136cd 100644
--- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp
+++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp
@@ -122,42 +122,10 @@ void view_nested_view()
   }
   Kokkos::deep_copy( host_tracking , tracking );
 
-#if KOKKOS_USING_EXP_VIEW
   ASSERT_EQ( 0 , host_tracking(0) );
-#endif
-
 }
 
 }
 
-#if ! KOKKOS_USING_EXP_VIEW
-
-namespace Kokkos {
-namespace Impl {
-
-template< class ExecSpace , class S >
-struct ViewDefaultConstruct< ExecSpace , Test::NestedView<S> , true >
-{
-  typedef Test::NestedView<S> type ;
-  type * const m_ptr ;
-
-  KOKKOS_FORCEINLINE_FUNCTION
-  void operator()( const typename ExecSpace::size_type& i ) const
-    { new(m_ptr+i) type(); }
-
-  ViewDefaultConstruct( type * pointer , size_t capacity )
-    : m_ptr( pointer )
-    {
-      Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
-      parallel_for( range , *this );
-      ExecSpace::fence();
-    }
-};
-
-} // namespace Impl
-} // namespace Kokkos
-
-#endif
-
 /*--------------------------------------------------------------------------*/
 
diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp
index 3846354b8c368f5c8505d84b4931a9105a6a14aa..1c2575b6f61c9fa11b28963852085960ecc420aa 100644
--- a/lib/kokkos/core/unit_test/TestViewSubview.hpp
+++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp
@@ -144,7 +144,7 @@ void test_auto_1d ()
   }
 
   for (size_type j = 0; j < numCols; ++j) {
-    auto X_j = Kokkos::subview (X, Kokkos::ALL(), j);
+    auto X_j = Kokkos::subview (X, Kokkos::ALL, j);
 
     fill_1D<decltype(X_j),Space> f4(X_j, ZERO);
     Kokkos::parallel_for(X_j.dimension_0(),f4);
@@ -154,7 +154,7 @@ void test_auto_1d ()
     }
 
     for (size_type jj = 0; jj < numCols; ++jj) {
-      auto X_jj = Kokkos::subview (X, Kokkos::ALL(), jj);
+      auto X_jj = Kokkos::subview (X, Kokkos::ALL, jj);
       fill_1D<decltype(X_jj),Space> f5(X_jj, ONE);
       Kokkos::parallel_for(X_jj.dimension_0(),f5);
       Kokkos::deep_copy (X_h, X);
@@ -172,9 +172,9 @@ void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int
   int col = n>2?2:0;
   int row = m>2?2:0;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
   if(a) {
-    Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL(),row);
+    Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL,row);
     ASSERT_TRUE( & l1da(0) == & l2d(0,row) );
     if(n>1)
       ASSERT_TRUE( & l1da(1) == & l2d(1,row) );
@@ -185,7 +185,7 @@ void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int
     ASSERT_TRUE( & l1db(1) == & l2d(3,row) );
   }
   if(c) {
-    Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL());
+    Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL);
     ASSERT_TRUE( & l1dc(0) == & l2d(col,0) );
     if(m>1)
       ASSERT_TRUE( & l1dc(1) == & l2d(col,1) );
@@ -226,7 +226,7 @@ void test_left_0()
   typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutLeft , Space >
     view_static_8_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_static_8_type  x_static_8("x_static_left_8");
 
@@ -290,7 +290,7 @@ void test_left_1()
   typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutLeft , Space >
     view_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_type  x8("x_left_8",2,3,4,5);
 
@@ -353,7 +353,7 @@ void test_left_2()
 {
   typedef Kokkos::View< int **** , Kokkos::LayoutLeft , Space > view_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_type  x4("x4",2,3,4,5);
 
@@ -417,7 +417,7 @@ void test_left_3()
 {
   typedef Kokkos::View< int ** , Kokkos::LayoutLeft , Space > view_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_type  xm("x4",10,5);
 
@@ -429,7 +429,7 @@ void test_left_3()
   ASSERT_TRUE( & x0() == & xm(5,3) );
 
   Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 =
-    Kokkos::subview( xm, Kokkos::ALL(), 3 );
+    Kokkos::subview( xm, Kokkos::ALL, 3 );
 
   ASSERT_TRUE( x1.is_contiguous() );
   for ( int i = 0 ; i < int(xm.dimension_0()) ; ++i ) {
@@ -437,7 +437,7 @@ void test_left_3()
   }
 
   Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 =
-    Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() );
+    Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL );
 
   ASSERT_TRUE( ! x2.is_contiguous() );
   for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j )
@@ -446,7 +446,7 @@ void test_left_3()
   }
 
   Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2c =
-    Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) );
+    Kokkos::subview( xm, Kokkos::ALL, std::pair<int,int>(2,4) );
 
   ASSERT_TRUE( x2c.is_contiguous() );
   for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j )
@@ -455,13 +455,13 @@ void test_left_3()
   }
 
   Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n1 =
-    Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() );
+    Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL );
 
   ASSERT_TRUE( x2_n1.dimension_0() == 0 );
   ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() );
 
   Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n2 =
-    Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) );
+    Kokkos::subview( xm , Kokkos::ALL , std::pair<int,int>(1,1) );
 
   ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() );
   ASSERT_TRUE( x2_n2.dimension_1() == 0 );
@@ -477,7 +477,7 @@ void test_right_0()
   typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutRight , Space >
     view_static_8_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_static_8_type  x_static_8("x_static_right_8");
 
@@ -542,7 +542,7 @@ void test_right_1()
   typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutRight , Space >
     view_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_type  x8("x_right_8",2,3,4,5);
 
@@ -597,7 +597,7 @@ void test_right_3()
 {
   typedef Kokkos::View< int ** , Kokkos::LayoutRight , Space > view_type ;
 
-  if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
+  if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) {
 
   view_type  xm("x4",10,5);
 
@@ -609,7 +609,7 @@ void test_right_3()
   ASSERT_TRUE( & x0() == & xm(5,3) );
 
   Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 =
-    Kokkos::subview( xm, 3, Kokkos::ALL() );
+    Kokkos::subview( xm, 3, Kokkos::ALL );
 
   ASSERT_TRUE( x1.is_contiguous() );
   for ( int i = 0 ; i < int(xm.dimension_1()) ; ++i ) {
@@ -617,7 +617,7 @@ void test_right_3()
   }
 
   Kokkos::View<int**,Kokkos::LayoutRight,Space> x2c =
-    Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() );
+    Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL );
 
   ASSERT_TRUE( x2c.is_contiguous() );
   for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j )
@@ -626,7 +626,7 @@ void test_right_3()
   }
 
   Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 =
-    Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) );
+    Kokkos::subview( xm, Kokkos::ALL, std::pair<int,int>(2,4) );
 
   ASSERT_TRUE( ! x2.is_contiguous() );
   for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j )
@@ -635,13 +635,13 @@ void test_right_3()
   }
 
   Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n1 =
-    Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() );
+    Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL );
 
   ASSERT_TRUE( x2_n1.dimension_0() == 0 );
   ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() );
 
   Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n2 =
-    Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) );
+    Kokkos::subview( xm , Kokkos::ALL , std::pair<int,int>(1,1) );
 
   ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() );
   ASSERT_TRUE( x2_n2.dimension_1() == 0 );
@@ -711,20 +711,21 @@ void test_Check3D5D(SubView a, View b, int i0, int i1, std::pair<int,int> range2
   ASSERT_TRUE( errors == 0 );
 }
 
-template<class Space, class LayoutSub, class Layout, class LayoutOrg>
+template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits>
 void test_1d_assign_impl() {
 
   { //Breaks
-    Kokkos::View<int*,LayoutOrg,Space> a("A",N0);
+    Kokkos::View<int*,LayoutOrg,Space> a_org("A",N0);
+    Kokkos::View<int*,LayoutOrg,Space,MemTraits> a(a_org);
     Kokkos::fence();
     for(int i=0; i<N0; i++)
-      a(i) = i;
+      a_org(i) = i;
 
-    Kokkos::View<int[N0],Layout,Space> a1(a);
+    Kokkos::View<int[N0],Layout,Space,MemTraits> a1(a);
     Kokkos::fence();
     test_Check1D(a1,a,std::pair<int,int>(0,N0));
 
-    Kokkos::View<int[N0],LayoutSub,Space> a2(a1);
+    Kokkos::View<int[N0],LayoutSub,Space,MemTraits> a2(a1);
     Kokkos::fence();
     test_Check1D(a2,a,std::pair<int,int>(0,N0));
     a1 = a;
@@ -738,8 +739,8 @@ void test_1d_assign_impl() {
   }
 
   { // Works
-    Kokkos::View<int[N0],LayoutOrg,Space> a("A");
-    Kokkos::View<int*,Layout,Space> a1(a);
+    Kokkos::View<int[N0],LayoutOrg,Space,MemTraits> a("A");
+    Kokkos::View<int*,Layout,Space,MemTraits> a1(a);
     Kokkos::fence();
     test_Check1D(a1,a,std::pair<int,int>(0,N0));
     a1 = a;
@@ -748,127 +749,491 @@ void test_1d_assign_impl() {
   }
 }
 
-template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg>
+template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg,class MemTraits>
 void test_2d_subview_3d_impl_type() {
   Kokkos::View<int***,LayoutOrg,Space> a_org("A",N0,N1,N2);
-  Kokkos::View<Type,Layout,Space> a(a_org);
+  Kokkos::View<Type,Layout,Space,MemTraits> a(a_org);
   for(int i0=0; i0<N0; i0++)
     for(int i1=0; i1<N1; i1++)
       for(int i2=0; i2<N2; i2++)
-        a(i0,i1,i2) = i0*1000000+i1*1000+i2;
-  Kokkos::View<TypeSub,LayoutSub,Space> a1;
-  a1 = Kokkos::subview(a,3,Kokkos::ALL(),Kokkos::ALL());
+        a_org(i0,i1,i2) = i0*1000000+i1*1000+i2;
+  Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a1;
+  a1 = Kokkos::subview(a,3,Kokkos::ALL,Kokkos::ALL);
   Kokkos::fence();
   test_Check2D3D(a1,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2));
 
-  Kokkos::View<TypeSub,LayoutSub,Space> a2(a,3,Kokkos::ALL(),Kokkos::ALL());
+  Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a2(a,3,Kokkos::ALL,Kokkos::ALL);
   Kokkos::fence();
   test_Check2D3D(a2,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2));
 }
 
-template<class Space, class LayoutSub, class Layout, class LayoutOrg>
+template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits>
 void test_2d_subview_3d_impl_layout() {
-  test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int*   [N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int**      ,LayoutSub, Layout, LayoutOrg>();
+  test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
 
-  test_2d_subview_3d_impl_type<Space,int*   [N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int*   [N1][N2],int*   [N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int*   [N1][N2],int**      ,LayoutSub, Layout, LayoutOrg>();
+  test_2d_subview_3d_impl_type<Space,int*   [N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int*   [N1][N2],int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int*   [N1][N2],int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
 
-  test_2d_subview_3d_impl_type<Space,int**      [N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int**      [N2],int*   [N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int**      [N2],int**      ,LayoutSub, Layout, LayoutOrg>();
+  test_2d_subview_3d_impl_type<Space,int**      [N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int**      [N2],int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int**      [N2],int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
 
-  test_2d_subview_3d_impl_type<Space,int***         ,int[N1][N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int***         ,int*   [N2],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_3d_impl_type<Space,int***         ,int**      ,LayoutSub, Layout, LayoutOrg>();
+  test_2d_subview_3d_impl_type<Space,int***         ,int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int***         ,int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,int***         ,int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_2d_subview_3d_impl_type<Space,const int*   [N1][N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int*   [N1][N2],const int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int*   [N1][N2],const int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_2d_subview_3d_impl_type<Space,const int**      [N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int**      [N2],const int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int**      [N2],const int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_2d_subview_3d_impl_type<Space,const int***         ,const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int***         ,const int*   [N2],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_2d_subview_3d_impl_type<Space,const int***         ,const int**      ,LayoutSub, Layout, LayoutOrg, MemTraits>();
 }
 
-template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg>
-void test_2d_subview_5d_impl_type() {
+template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg, class MemTraits>
+void test_3d_subview_5d_impl_type() {
   Kokkos::View<int*****,LayoutOrg,Space> a_org("A",N0,N1,N2,N3,N4);
-  Kokkos::View<Type,Layout,Space> a(a_org);
+  Kokkos::View<Type,Layout,Space,MemTraits> a(a_org);
   for(int i0=0; i0<N0; i0++)
     for(int i1=0; i1<N1; i1++)
       for(int i2=0; i2<N2; i2++)
         for(int i3=0; i3<N3; i3++)
           for(int i4=0; i4<N4; i4++)
-            a(i0,i1,i2,i3,i4) = i0*1000000+i1*10000+i2*100+i3*10+i4;
-  Kokkos::View<TypeSub,LayoutSub,Space> a1;
-  a1 = Kokkos::subview(a,3,5,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL());
+            a_org(i0,i1,i2,i3,i4) = i0*1000000+i1*10000+i2*100+i3*10+i4;
+  Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a1;
+  a1 = Kokkos::subview(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL);
   Kokkos::fence();
   test_Check3D5D(a1,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4));
 
-  Kokkos::View<TypeSub,LayoutSub,Space> a2(a,3,5,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL());
+  Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a2(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL);
   Kokkos::fence();
   test_Check3D5D(a2,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4));
 }
 
-template<class Space, class LayoutSub, class Layout, class LayoutOrg>
-void test_2d_subview_5d_impl_layout() {
-  test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int***         ,LayoutSub, Layout, LayoutOrg>();
-
-  test_2d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int***         ,LayoutSub, Layout, LayoutOrg>();
-
-  test_2d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int***         ,LayoutSub, Layout, LayoutOrg>();
-
-  test_2d_subview_5d_impl_type<Space, int***         [N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int***         [N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int***         [N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int***         [N3][N4],int***         ,LayoutSub, Layout, LayoutOrg>();
-
-  test_2d_subview_5d_impl_type<Space, int****            [N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int****            [N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int****            [N4],int**      [N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int****            [N4],int***         ,LayoutSub, Layout, LayoutOrg>();
-
-  test_2d_subview_5d_impl_type<Space, int*****               ,int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int*****               ,int*   [N3][N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int*****               ,int**      [N4],LayoutSub, Layout, LayoutOrg>();
-  test_2d_subview_5d_impl_type<Space, int*****               ,int***         ,LayoutSub, Layout, LayoutOrg>();
+template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits>
+void test_3d_subview_5d_impl_layout() {
+  test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int*   [N1][N2][N3][N4],int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int**      [N2][N3][N4],int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, int***         [N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int***         [N3][N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int***         [N3][N4],int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int***         [N3][N4],int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, int****            [N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int****            [N4],int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int****            [N4],int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int****            [N4],int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, int*****               ,int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int*****               ,int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int*****               ,int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, int*****               ,int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, const int*   [N1][N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int*   [N1][N2][N3][N4],const int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int*   [N1][N2][N3][N4],const int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int*   [N1][N2][N3][N4],const int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, const int**      [N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int**      [N2][N3][N4],const int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int**      [N2][N3][N4],const int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int**      [N2][N3][N4],const int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, const int***         [N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int***         [N3][N4],const int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int***         [N3][N4],const int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int***         [N3][N4],const int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, const int****            [N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int****            [N4],const int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int****            [N4],const int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int****            [N4],const int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
+
+  test_3d_subview_5d_impl_type<Space, const int*****               ,const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int*****               ,const int*   [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int*****               ,const int**      [N4],LayoutSub, Layout, LayoutOrg, MemTraits>();
+  test_3d_subview_5d_impl_type<Space, const int*****               ,const int***         ,LayoutSub, Layout, LayoutOrg, MemTraits>();
 }
+
+inline
+void test_subview_legal_args_right() {
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
 }
 
-template< class Space >
+inline
+void test_subview_legal_args_left() {
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value));
+
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value));
+  ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value));
+}
+
+}
+
+template< class Space, class MemTraits = void>
 void test_1d_assign() {
-  Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft  >();
+  Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft, MemTraits>();
   //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft  >();
-  Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft  >();
+  Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft  ,Kokkos::LayoutLeft, MemTraits>();
   //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft  ,Kokkos::LayoutRight ,Kokkos::LayoutLeft  >();
-  Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight ,Kokkos::LayoutRight  >();
-  Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutRight ,Kokkos::LayoutRight  >();
+  Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight ,Kokkos::LayoutRight, MemTraits>();
+  Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutRight ,Kokkos::LayoutRight, MemTraits>();
   //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft  ,Kokkos::LayoutStride,Kokkos::LayoutLeft  >();
   //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutStride,Kokkos::LayoutLeft  >();
-  Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft  >();
+  Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>();
 }
 
-template<class Space >
+template<class Space, class MemTraits = void>
 void test_2d_subview_3d() {
-  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight, Kokkos::LayoutRight>();
-  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight>();
-  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight>();
-  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft,  Kokkos::LayoutLeft>();
-  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft>();
+  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>();
+  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>();
+  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight, MemTraits>();
+  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft,  Kokkos::LayoutLeft,  MemTraits>();
+  Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft,  MemTraits>();
 }
 
-template<class Space >
-void test_2d_subview_5d() {
-  Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight>();
-  Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight>();
-  Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft,  Kokkos::LayoutLeft>();
-  Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft>();
+template<class Space, class MemTraits = void>
+void test_3d_subview_5d_right() {
+  Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>();
+  Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight, MemTraits>();
+}
+
+template<class Space, class MemTraits = void>
+void test_3d_subview_5d_left() {
+  Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft,  Kokkos::LayoutLeft,  MemTraits>();
+  Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft,  MemTraits>();
 }
 
+
+
+namespace Impl {
+
+  template<class Layout, class Space>
+  struct FillView_3D {
+    Kokkos::View<int***,Layout,Space> a;
+
+    KOKKOS_INLINE_FUNCTION
+    void operator() (const int& ii) const {
+      const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2());
+      const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        (ii / a.dimension_0()) % a.dimension_1() : (ii / a.dimension_2()) % a.dimension_1();
+      const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ?
+        ii / (a.dimension_0() * a.dimension_1()) : ii % a.dimension_2();
+      a(i,j,k) = 1000000 * i + 1000 * j + k;
+    }
+  };
+
+  template<class Layout, class Space>
+  struct FillView_4D {
+    Kokkos::View<int****,Layout,Space> a;
+
+    KOKKOS_INLINE_FUNCTION
+    void operator() (const int& ii) const {
+      const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+         ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()*a.dimension_3());
+      const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        (ii / a.dimension_0()) % a.dimension_1() : (ii / (a.dimension_2()*a.dimension_3()) % a.dimension_1());
+      const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ?
+        (ii / (a.dimension_0() * a.dimension_1())) % a.dimension_2() : (ii / a.dimension_3()) % a.dimension_2();
+      const int l = std::is_same<Layout,Kokkos::LayoutRight>::value ?
+         ii / (a.dimension_0() * a.dimension_1() * a.dimension_2()) : ii % a.dimension_3();
+      a(i,j,k,l) = 1000000 * i + 10000 * j + 100 * k + l;
+    }
+  }; 
+
+  template<class Layout, class Space, class MemTraits>
+  struct CheckSubviewCorrectness_3D_3D {
+    Kokkos::View<const int***,Layout,Space,MemTraits> a;
+    Kokkos::View<const int***,Layout,Space,MemTraits> b;
+    int offset_0,offset_2;
+
+    KOKKOS_INLINE_FUNCTION
+    void operator() (const int& ii) const {
+      const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2());
+      const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1();
+      const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ?
+        ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2();
+      if( a(i+offset_0,j,k+offset_2) != b(i,j,k))
+        Kokkos::abort("Error: check_subview_correctness 3D-3D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)");
+    }
+  };
+
+  template<class Layout, class Space, class MemTraits>
+  struct CheckSubviewCorrectness_3D_4D {
+    Kokkos::View<const int****,Layout,Space,MemTraits> a;
+    Kokkos::View<const int***,Layout,Space,MemTraits> b;
+    int offset_0,offset_2,index;
+
+    KOKKOS_INLINE_FUNCTION
+    void operator() (const int& ii) const {
+      const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2());
+      const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ?
+        (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1();
+      const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ?
+        ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2();
+
+      int i0,i1,i2,i3;
+      if(std::is_same<Layout,Kokkos::LayoutLeft>::value) {
+        i0 = i + offset_0;
+        i1 = j;
+        i2 = k + offset_2;
+        i3 = index;
+      } else {
+        i0 = index;
+        i1 = i + offset_0;
+        i2 = j;
+        i3 = k + offset_2;
+      }
+      if( a(i0,i1,i2,i3) != b(i,j,k))
+        Kokkos::abort("Error: check_subview_correctness 3D-4D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)");
+    }
+  };
+}
+
+template<class Space, class MemTraits = void>
+void test_layoutleft_to_layoutleft() {
+  Impl::test_subview_legal_args_left();
+
+  {
+    Kokkos::View<int***,Kokkos::LayoutLeft,Space> a("A",100,4,3);
+    Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::ALL);
+
+    Impl::FillView_3D<Kokkos::LayoutLeft,Space> fill;
+    fill.a = a;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill);  
+
+    Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutLeft,Space,MemTraits> check;
+    check.a = a;
+    check.b = b;
+    check.offset_0 = 16;
+    check.offset_2 = 0;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check);
+  }
+  {
+    Kokkos::View<int***,Kokkos::LayoutLeft,Space> a("A",100,4,5);
+    Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::pair<int,int>(1,3));
+
+    Impl::FillView_3D<Kokkos::LayoutLeft,Space> fill;
+    fill.a = a;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill);
+
+    Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutLeft,Space,MemTraits> check;
+    check.a = a;
+    check.b = b;
+    check.offset_0 = 16;
+    check.offset_2 = 1;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check);
+  }
+  {
+    Kokkos::View<int****,Kokkos::LayoutLeft,Space> a("A",100,4,5,3); 
+    Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::pair<int,int>(1,3),1);
+
+    Impl::FillView_4D<Kokkos::LayoutLeft,Space> fill;
+    fill.a = a;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill);
+
+    Impl::CheckSubviewCorrectness_3D_4D<Kokkos::LayoutLeft,Space,MemTraits> check;
+    check.a = a;
+    check.b = b;
+    check.offset_0 = 16;
+    check.offset_2 = 1;
+    check.index = 1;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check);
+  }
+}
+
+template<class Space, class MemTraits = void>
+void test_layoutright_to_layoutright() {
+  Impl::test_subview_legal_args_right();
+
+  {
+    Kokkos::View<int***,Kokkos::LayoutRight,Space> a("A",100,4,3);
+    Kokkos::View<int***,Kokkos::LayoutRight,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::ALL);
+
+    Impl::FillView_3D<Kokkos::LayoutRight,Space> fill;
+    fill.a = a;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill);
+
+    Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutRight,Space,MemTraits> check;
+    check.a = a;
+    check.b = b;
+    check.offset_0 = 16;
+    check.offset_2 = 0;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check);
+  }
+  {
+    Kokkos::View<int****,Kokkos::LayoutRight,Space> a("A",3,4,5,100);
+    Kokkos::View<int***,Kokkos::LayoutRight,Space> b(a,1,Kokkos::pair<int,int>(1,3),Kokkos::ALL,Kokkos::ALL);
+
+
+    Impl::FillView_4D<Kokkos::LayoutRight,Space> fill;
+    fill.a = a;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill);
+
+    Impl::CheckSubviewCorrectness_3D_4D<Kokkos::LayoutRight,Space,MemTraits> check;
+    check.a = a;
+    check.b = b;
+    check.offset_0 = 1;
+    check.offset_2 = 0;
+    check.index = 1;
+    Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check);
+  }
+}
+
+
 }
 //----------------------------------------------------------------------------
 
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a49d9ef41ed81a9b1c8b49cfe0e338bcd75d2d3e
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp
@@ -0,0 +1,107 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_TEST_CUDAHPP
+#define KOKKOS_TEST_CUDAHPP
+#include <gtest/gtest.h>
+
+#include <Kokkos_Macros.hpp>
+
+#include <Kokkos_Core.hpp>
+
+#include <TestTile.hpp>
+
+//----------------------------------------------------------------------------
+
+#include <TestSharedAlloc.hpp>
+#include <TestViewMapping.hpp>
+
+
+#include <TestViewAPI.hpp>
+#include <TestViewOfClass.hpp>
+#include <TestViewSubview.hpp>
+#include <TestAtomic.hpp>
+#include <TestAtomicOperations.hpp>
+#include <TestRange.hpp>
+#include <TestTeam.hpp>
+#include <TestReduce.hpp>
+#include <TestScan.hpp>
+#include <TestAggregate.hpp>
+#include <TestCompilerMacros.hpp>
+#include <TestTaskScheduler.hpp>
+#include <TestMemoryPool.hpp>
+
+
+#include <TestCXX11.hpp>
+#include <TestCXX11Deduction.hpp>
+#include <TestTeamVector.hpp>
+#include <TestTemplateMetaFunctions.hpp>
+
+#include <TestPolicyConstruction.hpp>
+
+#include <TestMDRange.hpp>
+
+namespace Test {
+
+// For Some Reason I can only have the definition of SetUp and TearDown in one cpp file ...
+class cuda : public ::testing::Test {
+protected:
+  static void SetUpTestCase();
+  static void TearDownTestCase();
+};
+
+#ifdef TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN
+void cuda::SetUpTestCase()
+  {
+    Kokkos::Cuda::print_configuration( std::cout );
+    Kokkos::HostSpace::execution_space::initialize();
+    Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
+  }
+
+void cuda::TearDownTestCase()
+  {
+    Kokkos::Cuda::finalize();
+    Kokkos::HostSpace::execution_space::finalize();
+  }
+#endif
+}
+#endif
diff --git a/lib/kokkos/core/unit_test/TestCuda_c.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp
similarity index 63%
rename from lib/kokkos/core/unit_test/TestCuda_c.cpp
rename to lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp
index 70584cead1b5efb7b6b0b372aed95dd522c25169..113b72c70f4fb9032577a8d38a3e129fe48d86c1 100644
--- a/lib/kokkos/core/unit_test/TestCuda_c.cpp
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp
@@ -40,61 +40,11 @@
 // ************************************************************************
 //@HEADER
 */
-
-#include <gtest/gtest.h>
-
-#include <iostream>
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp>
-#include <impl/Kokkos_ViewTileLeft.hpp>
-#include <TestTile.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-#include <TestAtomicOperations.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestMemoryPool.hpp>
-#include <TestTeamVector.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-#include <TestCXX11Deduction.hpp>
-
-#include <TestTaskPolicy.hpp>
-#include <TestPolicyConstruction.hpp>
-
-//----------------------------------------------------------------------------
-
-class cuda : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
-//----------------------------------------------------------------------------
+#include <cuda/TestCuda.hpp>
 
 namespace Test {
 
-TEST_F( cuda, atomic )
+TEST_F( cuda , atomics )
 {
   const int loop_count = 1e3 ;
 
@@ -133,7 +83,6 @@ TEST_F( cuda, atomic )
   ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,1) ) );
   ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,2) ) );
   ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,3) ) );
-
 }
 
 TEST_F( cuda , atomic_operations )
@@ -151,6 +100,8 @@ TEST_F( cuda , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 2 ) ) );
@@ -161,6 +112,8 @@ TEST_F( cuda , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 2 ) ) );
@@ -171,6 +124,8 @@ TEST_F( cuda , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 2 ) ) );
@@ -181,6 +136,8 @@ TEST_F( cuda , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 2 ) ) );
@@ -191,6 +148,8 @@ TEST_F( cuda , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 2 ) ) );
@@ -205,171 +164,5 @@ TEST_F( cuda , atomic_operations )
 
 }
 
-//----------------------------------------------------------------------------
-
-TEST_F( cuda, tile_layout)
-{
-  TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 );
-  TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 );
-  TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 );
-
-  TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 );
-  TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 );
-  TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 );
-  TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 );
-
-  TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 );
-  TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 );
-
-  TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 );
-  TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 );
-  TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 );
-  TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 );
-
-  TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 );
-  TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 );
-  TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 );
-  TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 );
-}
-
-TEST_F( cuda , view_aggregate )
-{
-  TestViewAggregate< Kokkos::Cuda >();
-  TestViewAggregateReduction< Kokkos::Cuda >();
-}
-
-TEST_F( cuda , scan )
-{
-  TestScan< Kokkos::Cuda >::test_range( 1 , 1000 );
-  TestScan< Kokkos::Cuda >( 1000000 );
-  TestScan< Kokkos::Cuda >( 10000000 );
-
-  TestScan< Kokkos::Cuda >( 0 );
-  TestScan< Kokkos::Cuda >( 0 , 0 );
-
-  Kokkos::Cuda::fence();
-}
-
-TEST_F( cuda , team_scan )
-{
-  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10 );
-  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
-  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10000 );
-  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
-}
-
-TEST_F( cuda , memory_pool )
-{
-//  typedef Kokkos::CudaUVMSpace  device_type;
-  typedef Kokkos::Cuda          device_type;
-
-  bool val = TestMemoryPool::test_mempool< device_type >( 128, 128000000 );
-  ASSERT_TRUE( val );
-
-  Kokkos::Cuda::fence();
-
-  TestMemoryPool::test_mempool2< device_type >( 64, 4, 100000, 200000 );
-
-  Kokkos::Cuda::fence();
-
-  TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >();
-
-  Kokkos::Cuda::fence();
-}
-
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( cuda , template_meta_functions )
-{
-  TestTemplateMetaFunctions<int, Kokkos::Cuda >();
-}
-
-//----------------------------------------------------------------------------
-
-namespace Test {
-
-TEST_F( cuda , reduction_deduction )
-{
-  TestCXX11::test_reduction_deduction< Kokkos::Cuda >();
-}
-
-TEST_F( cuda , team_vector )
-{
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) );
-  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) );
-}
-
-TEST_F( cuda, triple_nested_parallelism )
-{
-  TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 );
-  TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 );
-  TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 );
-}
-
-}
-
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ENABLE_TASKPOLICY )
-
-TEST_F( cuda , task_fib )
-{
-  for ( int i = 0 ; i < 25 ; ++i ) {
-    TestTaskPolicy::TestFib< Kokkos::Cuda >::run(i, (i+1)*1000000 );
-  }
-}
-
-TEST_F( cuda , task_depend )
-{
-  for ( int i = 0 ; i < 25 ; ++i ) {
-    TestTaskPolicy::TestTaskDependence< Kokkos::Cuda >::run(i);
-  }
-}
-
-TEST_F( cuda , task_team )
-{
-  //TestTaskPolicy::TestTaskTeam< Kokkos::Cuda >::run(1000);
-  TestTaskPolicy::TestTaskTeam< Kokkos::Cuda >::run(104);
-  TestTaskPolicy::TestTaskTeamValue< Kokkos::Cuda >::run(1000);
-}
-
-//----------------------------------------------------------------------------
-
-TEST_F( cuda , old_task_policy )
-{
-  TestTaskPolicy::test_task_dep< Kokkos::Cuda >( 10 );
-
-  for ( long i = 0 ; i < 15 ; ++i ) {
-      // printf("TestTaskPolicy::test_fib< Kokkos::Cuda >(%d);\n",i);
-    TestTaskPolicy::test_fib< Kokkos::Cuda >(i,4096);
-  }
-  for ( long i = 0 ; i < 35 ; ++i ) {
-      // printf("TestTaskPolicy::test_fib2< Kokkos::Cuda >(%d);\n",i);
-    TestTaskPolicy::test_fib2< Kokkos::Cuda >(i,4096);
-  }
-}
-
-TEST_F( cuda , old_task_team )
-{
-  TestTaskPolicy::test_task_team< Kokkos::Cuda >(1000);
-}
-
-TEST_F( cuda , old_task_latch )
-{
-  TestTaskPolicy::test_latch< Kokkos::Cuda >(10);
-  TestTaskPolicy::test_latch< Kokkos::Cuda >(1000);
-}
-
-#endif // #if defined( KOKKOS_ENABLE_TASKPOLICY )
+} // namespace test
 
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..80de6618e62f0f439fbfba06c08578b208389997
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp
@@ -0,0 +1,189 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#define TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , init ) {
+  ;
+}
+
+TEST_F( cuda , md_range ) {
+  TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100);
+
+  TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100);
+}
+
+TEST_F( cuda, policy_construction) {
+  TestRangePolicyConstruction< Kokkos::Cuda >();
+  TestTeamPolicyConstruction< Kokkos::Cuda >();
+}
+
+TEST_F( cuda , range_tag )
+{
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(0);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0);
+
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(2);
+
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3);
+
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
+
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
+  TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
+}
+
+
+//----------------------------------------------------------------------------
+
+TEST_F( cuda , compiler_macros )
+{
+  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) );
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( cuda , memory_pool )
+{
+  bool val = TestMemoryPool::test_mempool< Kokkos::Cuda >( 128, 128000000 );
+  ASSERT_TRUE( val );
+
+  TestMemoryPool::test_mempool2< Kokkos::Cuda >( 64, 4, 1000000, 2000000 );
+
+  TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >();
+}
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+TEST_F( cuda , task_fib )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestFib< Kokkos::Cuda >::run(i, (i+1)*(i+1)*10000 );
+  }
+}
+
+TEST_F( cuda , task_depend )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run(i);
+  }
+}
+
+TEST_F( cuda , task_team )
+{
+  TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run(1000);
+  //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run(1000); //put back after testing
+}
+
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
+TEST_F( cuda , cxx11 )
+{
+  if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Cuda >::value ) {
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(1) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(2) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(3) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(4) ) );
+  }
+}
+#endif
+
+TEST_F( cuda, tile_layout )
+{
+  TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 );
+  TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 );
+  TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 );
+
+  TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 );
+  TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 );
+  TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 );
+  TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::Cuda , 4 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 );
+  TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 );
+  TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 );
+
+  TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 );
+  TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 );
+  TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 );
+  TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 );
+}
+
+#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+#if defined (KOKKOS_COMPILER_CLANG)
+TEST_F( cuda , dispatch )
+{
+  const int repeat = 100 ;
+  for ( int i = 0 ; i < repeat ; ++i ) {
+  for ( int j = 0 ; j < repeat ; ++j ) {
+    Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >(0,j)
+                        , KOKKOS_LAMBDA( int ) {} );
+  }}
+}
+#endif
+#endif
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9ab9fe72d494a672cefe07f770ea38663e2ffec
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp
@@ -0,0 +1,56 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , reducers )
+{
+  TestReducers<int, Kokkos::Cuda>::execute_integer();
+  TestReducers<size_t, Kokkos::Cuda>::execute_integer();
+  TestReducers<double, Kokkos::Cuda>::execute_float();
+  TestReducers<Kokkos::complex<double>, Kokkos::Cuda>::execute_basic();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c588d752dd21ef2135d1e4fa52c37f5dba0c37a9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp
@@ -0,0 +1,130 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, long_reduce) {
+  TestReduce< long ,   Kokkos::Cuda >( 0 );
+  TestReduce< long ,   Kokkos::Cuda >( 1000000 );
+}
+
+TEST_F( cuda, double_reduce) {
+  TestReduce< double ,   Kokkos::Cuda >( 0 );
+  TestReduce< double ,   Kokkos::Cuda >( 1000000 );
+}
+
+TEST_F( cuda, long_reduce_dynamic ) {
+  TestReduceDynamic< long ,   Kokkos::Cuda >( 0 );
+  TestReduceDynamic< long ,   Kokkos::Cuda >( 1000000 );
+}
+
+TEST_F( cuda, double_reduce_dynamic ) {
+  TestReduceDynamic< double ,   Kokkos::Cuda >( 0 );
+  TestReduceDynamic< double ,   Kokkos::Cuda >( 1000000 );
+}
+
+TEST_F( cuda, long_reduce_dynamic_view ) {
+  TestReduceDynamicView< long ,   Kokkos::Cuda >( 0 );
+  TestReduceDynamicView< long ,   Kokkos::Cuda >( 1000000 );
+}
+
+TEST_F( cuda , scan )
+{
+  TestScan< Kokkos::Cuda >::test_range( 1 , 1000 );
+  TestScan< Kokkos::Cuda >( 0 );
+  TestScan< Kokkos::Cuda >( 100000 );
+  TestScan< Kokkos::Cuda >( 10000000 );
+  Kokkos::Cuda::fence();
+}
+
+#if 0
+TEST_F( cuda , scan_small )
+{
+  typedef TestScan< Kokkos::Cuda , Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor ;
+  for ( int i = 0 ; i < 1000 ; ++i ) {
+    TestScanFunctor( 10 );
+    TestScanFunctor( 10000 );
+  }
+  TestScanFunctor( 1000000 );
+  TestScanFunctor( 10000000 );
+
+  Kokkos::Cuda::fence();
+}
+#endif
+
+TEST_F( cuda  , team_scan )
+{
+  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10 );
+  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
+  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10000 );
+  TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
+}
+
+TEST_F( cuda , team_long_reduce) {
+  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< long ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( cuda , team_double_reduce) {
+  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< double ,   Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( cuda , reduction_deduction )
+{
+  TestCXX11::test_reduction_deduction< Kokkos::Cuda >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f3cbc3b8897625f07f7c4fc810662b68cfe907e9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp
@@ -0,0 +1,399 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+__global__
+void test_abort()
+{
+  Kokkos::abort("test_abort");
+}
+
+__global__
+void test_cuda_spaces_int_value( int * ptr )
+{
+  if ( *ptr == 42 ) { *ptr = 2 * 42 ; }
+}
+
+TEST_F( cuda , space_access )
+{
+  //--------------------------------------
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" );
+
+  //--------------------------------------
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::accessible , "" );
+
+  //--------------------------------------
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" );
+
+  //--------------------------------------
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::assignable , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::assignable , "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::accessible , "" );
+
+  //--------------------------------------
+
+  static_assert(
+    ! Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::HostSpace >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaSpace >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaUVMSpace >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaHostPinnedSpace >::accessible , "" );
+
+  static_assert(
+    ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" );
+
+
+  static_assert(
+    std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space
+                , Kokkos::HostSpace >::value , "" );
+
+  static_assert(
+    std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space
+                , Kokkos::Device< Kokkos::HostSpace::execution_space
+                                , Kokkos::CudaUVMSpace > >::value , "" );
+
+  static_assert(
+    std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space
+                , Kokkos::CudaHostPinnedSpace >::value , "" );
+
+  static_assert(
+    std::is_same< Kokkos::Device< Kokkos::HostSpace::execution_space
+                                , Kokkos::CudaUVMSpace >
+                , Kokkos::Device< Kokkos::HostSpace::execution_space
+                                , Kokkos::CudaUVMSpace > >::value , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::Cuda >::Space
+      , Kokkos::HostSpace
+      >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space
+      , Kokkos::HostSpace
+      >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space
+      , Kokkos::HostSpace
+      >::accessible , "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space
+      , Kokkos::HostSpace
+      >::accessible , "" );
+}
+
+TEST_F( cuda, uvm )
+{
+  if ( Kokkos::CudaUVMSpace::available() ) {
+
+    int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int));
+
+    *uvm_ptr = 42 ;
+
+    Kokkos::Cuda::fence();
+    test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr);
+    Kokkos::Cuda::fence();
+
+    EXPECT_EQ( *uvm_ptr, int(2*42) );
+
+    Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr );
+
+  }
+}
+
+TEST_F( cuda, uvm_num_allocs )
+{
+  // The max number of uvm allocations allowed is 65536
+  #define MAX_NUM_ALLOCS 65536
+
+  if ( Kokkos::CudaUVMSpace::available() ) {
+
+    struct TestMaxUVMAllocs {
+
+      using view_type         = Kokkos::View< double* , Kokkos::CudaUVMSpace >;
+      using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] 
+                                            , Kokkos::CudaUVMSpace >;
+
+      TestMaxUVMAllocs()
+      : view_allocs_test("view_allocs_test")
+      {
+
+        for ( auto i = 0; i < MAX_NUM_ALLOCS ; ++i ) {
+
+          // Kokkos will throw a runtime exception if an attempt is made to 
+          // allocate more than the maximum number of uvm allocations
+
+          // In this test, the max num of allocs occurs when i = MAX_NUM_ALLOCS - 1
+          // since the 'outer' view counts as one UVM allocation, leaving
+          // 65535 possible UVM allocations, that is 'i in [0 , 65535)'
+
+          // The test will catch the exception thrown in this case and continue
+
+          if ( i == ( MAX_NUM_ALLOCS - 1) ) {
+            EXPECT_ANY_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ;
+          }
+          else {
+            if(i<MAX_NUM_ALLOCS - 1000) {
+              EXPECT_NO_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ;
+            } else { // This might or might not throw depending on compilation options. 
+              try {
+                view_allocs_test(i) = view_type("inner_view",1);
+              }
+              catch (...) {}
+            }
+          }
+
+        } //end allocation for loop
+
+        for ( auto i = 0; i < MAX_NUM_ALLOCS -1; ++i ) {
+
+          view_allocs_test(i) = view_type();
+
+        } //end deallocation for loop
+
+        view_allocs_test = view_of_view_type(); // deallocate the view of views
+      }
+
+      // Member
+      view_of_view_type view_allocs_test ;
+    } ;
+
+    // trigger the test via the TestMaxUVMAllocs constructor
+    TestMaxUVMAllocs() ;
+
+  }
+  #undef MAX_NUM_ALLOCS 
+}
+
+template< class MemSpace , class ExecSpace >
+struct TestViewCudaAccessible {
+
+  enum { N = 1000 };
+
+  using V = Kokkos::View<double*,MemSpace> ;
+
+  V m_base ;
+
+  struct TagInit {};
+  struct TagTest {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const TagTest & , const int i , long & error_count ) const
+    { if ( m_base[i] != i + 1 ) ++error_count ; }
+
+  TestViewCudaAccessible()
+    : m_base("base",N)
+    {}
+
+  static void run()
+    {
+      TestViewCudaAccessible self ;
+      Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self );
+      MemSpace::execution_space::fence();
+      // Next access is a different execution space, must complete prior kernel.
+      long error_count = -1 ;
+      Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count );
+      EXPECT_EQ( error_count , 0 );
+    }
+};
+
+TEST_F( cuda , impl_view_accessible )
+{
+  TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run();
+
+  TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run();
+  TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run();
+
+  TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run();
+  TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run();
+}
+
+template< class MemSpace >
+struct TestViewCudaTexture {
+
+  enum { N = 1000 };
+
+  using V = Kokkos::View<double*,MemSpace> ;
+  using T = Kokkos::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ;
+
+  V m_base ;
+  T m_tex ;
+
+  struct TagInit {};
+  struct TagTest {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const TagTest & , const int i , long & error_count ) const
+    { if ( m_tex[i] != i + 1 ) ++error_count ; }
+
+  TestViewCudaTexture()
+    : m_base("base",N)
+    , m_tex( m_base )
+    {}
+
+  static void run()
+    {
+      EXPECT_TRUE( ( std::is_same< typename V::reference_type
+                                 , double &
+                                 >::value ) );
+
+      EXPECT_TRUE( ( std::is_same< typename T::reference_type
+                                 , const double
+                                 >::value ) );
+
+      EXPECT_TRUE(  V::reference_type_is_lvalue_reference ); // An ordinary view
+      EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value
+
+      TestViewCudaTexture self ;
+      Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self );
+      long error_count = -1 ;
+      Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count );
+      EXPECT_EQ( error_count , 0 );
+    }
+};
+
+
+TEST_F( cuda , impl_view_texture )
+{
+  TestViewCudaTexture< Kokkos::CudaSpace >::run();
+  TestViewCudaTexture< Kokkos::CudaUVMSpace >::run();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd8a647ef3f03b9d1109a464a51cd06e90de703d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp
@@ -0,0 +1,92 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_auto_1d_left ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >();
+}
+
+TEST_F( cuda, view_subview_auto_1d_right ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >();
+}
+
+TEST_F( cuda, view_subview_auto_1d_stride ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >();
+}
+
+TEST_F( cuda, view_subview_assign_strided ) {
+  TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >();
+}
+
+TEST_F( cuda, view_subview_left_0 ) {
+  TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >();
+}
+
+TEST_F( cuda, view_subview_left_1 ) {
+  TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >();
+}
+
+TEST_F( cuda, view_subview_left_2 ) {
+  TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >();
+}
+
+TEST_F( cuda, view_subview_left_3 ) {
+  TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >();
+}
+
+TEST_F( cuda, view_subview_right_0 ) {
+  TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >();
+}
+
+TEST_F( cuda, view_subview_right_1 ) {
+  TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >();
+}
+
+TEST_F( cuda, view_subview_right_3 ) {
+  TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..053fcfc2095c26540ff75e545bb4f920e0a96912
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp
@@ -0,0 +1,60 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_layoutleft_to_layoutleft) {
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+TEST_F( cuda, view_subview_layoutright_to_layoutright) {
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c5f2ef72fdd45b2b9033d54c3c83e70c3c089c1
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_1d_assign ) {
+  TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..aee6f1730d6fb33e15877a043fe0ef8beaed11d9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_1d_assign_atomic ) {
+  TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2ef48c686e1d3a202aaf5f017d9ac88cc486085d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_1d_assign_randomaccess ) {
+  TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp
similarity index 89%
rename from lib/kokkos/core/src/impl/Kokkos_Singleton.hpp
rename to lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp
index 86bc94ab0be9e8cfd00ea5a95cebc906bd3aa312..aec123ac235ef631172b3dc7c26151d2da7e38da 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,20 +36,17 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
+#include <cuda/TestCuda.hpp>
 
-#ifndef KOKKOS_SINGLETON_HPP
-#define KOKKOS_SINGLETON_HPP
-
-#include <Kokkos_Macros.hpp>
-#include <cstddef>
-
-namespace Kokkos { namespace Impl {
+namespace Test {
 
+TEST_F( cuda, view_subview_2d_from_3d ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >();
+}
 
-}} // namespace Kokkos::Impl
+} // namespace test
 
-#endif // KOKKOS_SINGLETON_HPP
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8ad2319963b2750e01d518309e84c7423a387d6
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_2d_from_3d_atomic ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e86b4513fd8b8fdeb85c7bce130b3ae274d5e214
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ad9dcc0fd1faccf2c8f8ff5e254b82a33f9d998b
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_3d_from_5d_left ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f97d97e59c205fda791ac1d231b1429e1f8d4ec2
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2a07f28f830a125d865eb89a4a456cb5d0aa2b62
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3c51d9420184c91d8ddc1b15e9fb50659c1651d6
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_3d_from_5d_right ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..835caa7b879891ed4cd0d24bac61bdaf6a686efb
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..53bd5eee20205d56ca4356df4f2bb1118e0ff93d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e4348319f695da2819e24143754777746bdc35d6
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp
@@ -0,0 +1,12 @@
+#include<cuda/TestCuda_SubView_c01.cpp>
+#include<cuda/TestCuda_SubView_c02.cpp>
+#include<cuda/TestCuda_SubView_c03.cpp>
+#include<cuda/TestCuda_SubView_c04.cpp>
+#include<cuda/TestCuda_SubView_c05.cpp>
+#include<cuda/TestCuda_SubView_c06.cpp>
+#include<cuda/TestCuda_SubView_c07.cpp>
+#include<cuda/TestCuda_SubView_c08.cpp>
+#include<cuda/TestCuda_SubView_c09.cpp>
+#include<cuda/TestCuda_SubView_c10.cpp>
+#include<cuda/TestCuda_SubView_c11.cpp>
+#include<cuda/TestCuda_SubView_c12.cpp>
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..800a458af918c9a1bca1f4c3d6816c7a3c9b4403
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp
@@ -0,0 +1,120 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , team_tag )
+{
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2);
+
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
+  TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
+}
+
+TEST_F( cuda , team_shared_request) {
+  TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >();
+  TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+//THis Tests request to much L0 scratch
+//TEST_F( cuda, team_scratch_request) {
+//  TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >();
+//  TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >();
+//}
+
+#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+TEST_F( cuda , team_lambda_shared_request) {
+  TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >();
+  TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >();
+  TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static>  >();
+  TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >();
+  TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >();
+  TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic>  >();
+}
+#endif
+
+TEST_F( cuda, shmem_size) {
+  TestShmemSize< Kokkos::Cuda >();
+}
+
+TEST_F( cuda, multi_level_scratch) {
+  TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >();
+  TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+TEST_F( cuda , team_vector )
+{
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) );
+}
+
+TEST_F( cuda, triple_nested_parallelism )
+{
+  TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 );
+  TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 );
+  TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 );
+}
+
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c01ca1c1463c6573c8d9e51c0ca31ed43c19941e
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp
@@ -0,0 +1,59 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , impl_view_mapping_a ) {
+  test_view_mapping< Kokkos::CudaSpace >();
+  test_view_mapping_operator< Kokkos::CudaSpace >();
+}
+
+TEST_F( cuda , view_of_class )
+{
+  TestViewMappingClassValue< Kokkos::CudaSpace >::run();
+  TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8e821ada000678c762b22db574dd1e0d816bbd54
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp
@@ -0,0 +1,53 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , impl_view_mapping_d ) {
+  test_view_mapping< Kokkos::CudaHostPinnedSpace >();
+  test_view_mapping_operator< Kokkos::CudaHostPinnedSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cf29a68e96586dc5d194bd0b28338259784dceb0
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp
@@ -0,0 +1,53 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , impl_view_mapping_c ) {
+  test_view_mapping< Kokkos::CudaUVMSpace >();
+  test_view_mapping_operator< Kokkos::CudaUVMSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..db14b5158f6efa01a6397df98041827a830158d4
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp
@@ -0,0 +1,112 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , view_nested_view )
+{
+  ::Test::view_nested_view< Kokkos::Cuda >();
+}
+
+
+
+TEST_F( cuda , view_remap )
+{
+  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
+
+  typedef Kokkos::View< double*[N1][N2][N3] ,
+                             Kokkos::LayoutRight ,
+                             Kokkos::CudaUVMSpace > output_type ;
+
+  typedef Kokkos::View< int**[N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::CudaUVMSpace > input_type ;
+
+  typedef Kokkos::View< int*[N0][N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::CudaUVMSpace > diff_type ;
+
+  output_type output( "output" , N0 );
+  input_type  input ( "input" , N0 , N1 );
+  diff_type   diff  ( "diff" , N0 );
+
+  Kokkos::fence();
+  int value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    input(i0,i1,i2,i3) = ++value ;
+  }}}}
+  Kokkos::fence();
+
+  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
+  Kokkos::deep_copy( output , input );
+ 
+  Kokkos::fence();
+  value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    ++value ;
+    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
+  }}}}
+  Kokkos::fence();
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( cuda , view_aggregate )
+{
+  TestViewAggregate< Kokkos::Cuda >();
+}
+
+TEST_F( cuda , template_meta_functions )
+{
+  TestTemplateMetaFunctions<int, Kokkos::Cuda >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..07d425647330228815a7103e6f7596a8a2f2a460
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp
@@ -0,0 +1,63 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda , impl_shared_alloc ) {
+  test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >();
+  test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >();
+  test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >();
+}
+
+TEST_F( cuda , impl_view_mapping_b ) {
+  test_view_mapping_subview< Kokkos::CudaSpace >();
+  test_view_mapping_subview< Kokkos::CudaUVMSpace >();
+  test_view_mapping_subview< Kokkos::CudaHostPinnedSpace >();
+  TestViewMappingAtomic< Kokkos::CudaSpace >::run();
+  TestViewMappingAtomic< Kokkos::CudaUVMSpace >::run();
+  TestViewMappingAtomic< Kokkos::CudaHostPinnedSpace >::run();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..34721f02dc73f418ba7c348fe65c3a59d534dc7c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp
@@ -0,0 +1,55 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_api_a) {
+  typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ;
+  typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ;
+
+  TestViewAPI< double , Kokkos::Cuda >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..abbcf3bf8bfa6d89ff5c5a5891d8cd16018becf0
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_api_b) {
+  TestViewAPI< double , Kokkos::CudaUVMSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9899642035ada183fe7b7b5c4a60610e3c271739
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <cuda/TestCuda.hpp>
+
+namespace Test {
+
+TEST_F( cuda, view_api_c) {
+  TestViewAPI< double , Kokkos::CudaHostPinnedSpace >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..01324a1eeb82f21802a1055a0c42609e0b1e5c44
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp
@@ -0,0 +1,116 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_TEST_OPENMPHPP
+#define KOKKOS_TEST_OPENMPHPP
+#include <gtest/gtest.h>
+
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_LAMBDA
+#undef KOKKOS_LAMBDA
+#endif
+#define KOKKOS_LAMBDA [=]
+
+#include <Kokkos_Core.hpp>
+
+#include <TestTile.hpp>
+
+//----------------------------------------------------------------------------
+
+#include <TestSharedAlloc.hpp>
+#include <TestViewMapping.hpp>
+
+
+#include <TestViewAPI.hpp>
+#include <TestViewOfClass.hpp>
+#include <TestViewSubview.hpp>
+#include <TestAtomic.hpp>
+#include <TestAtomicOperations.hpp>
+#include <TestRange.hpp>
+#include <TestTeam.hpp>
+#include <TestReduce.hpp>
+#include <TestScan.hpp>
+#include <TestAggregate.hpp>
+#include <TestCompilerMacros.hpp>
+#include <TestTaskScheduler.hpp>
+#include <TestMemoryPool.hpp>
+
+
+#include <TestCXX11.hpp>
+#include <TestCXX11Deduction.hpp>
+#include <TestTeamVector.hpp>
+#include <TestTemplateMetaFunctions.hpp>
+
+#include <TestPolicyConstruction.hpp>
+
+#include <TestMDRange.hpp>
+
+namespace Test {
+
+class openmp : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
+  {
+    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
+    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
+    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
+
+    const unsigned threads_count = std::max( 1u , numa_count ) *
+                                   std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
+
+    Kokkos::OpenMP::initialize( threads_count );
+    Kokkos::OpenMP::print_configuration( std::cout , true );
+    srand(10231);
+  }
+
+  static void TearDownTestCase()
+  {
+    Kokkos::OpenMP::finalize();
+
+    omp_set_num_threads(1);
+
+    ASSERT_EQ( 1 , omp_get_max_threads() );
+  }
+};
+
+}
+#endif
diff --git a/lib/kokkos/core/unit_test/TestOpenMP.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp
similarity index 80%
rename from lib/kokkos/core/unit_test/TestOpenMP.cpp
rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp
index 6e8fc4517917bfcaaeecba6fbc2ac59f6090350d..91722c8490be3ad33a635359c449eaa3df993369 100644
--- a/lib/kokkos/core/unit_test/TestOpenMP.cpp
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,118 +36,14 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
-
-#include <gtest/gtest.h>
-
-#include <Kokkos_Macros.hpp>
-#ifdef KOKKOS_LAMBDA
-#undef KOKKOS_LAMBDA
-#endif
-#define KOKKOS_LAMBDA [=]
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-#include <TestAtomicOperations.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemoryPool.hpp>
-
-
-#include <TestCXX11.hpp>
-#include <TestCXX11Deduction.hpp>
-#include <TestTeamVector.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-
-#include <TestPolicyConstruction.hpp>
-
-#include <TestMDRange.hpp>
+#include <openmp/TestOpenMP.hpp>
 
 namespace Test {
 
-class openmp : public ::testing::Test {
-protected:
-  static void SetUpTestCase()
-  {
-    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
-    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
-    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
-
-    const unsigned threads_count = std::max( 1u , numa_count ) *
-                                   std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
-
-    Kokkos::OpenMP::initialize( threads_count );
-    Kokkos::OpenMP::print_configuration( std::cout , true );
-    srand(10231);
-  }
-
-  static void TearDownTestCase()
-  {
-    Kokkos::OpenMP::finalize();
-
-    omp_set_num_threads(1);
-
-    ASSERT_EQ( 1 , omp_get_max_threads() );
-  }
-};
-
-
-TEST_F( openmp , md_range ) {
-  TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100);
-
-  TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100);
-}
-
-TEST_F( openmp , impl_shared_alloc ) {
-  test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >();
-}
-
-TEST_F( openmp, policy_construction) {
-  TestRangePolicyConstruction< Kokkos::OpenMP >();
-  TestTeamPolicyConstruction< Kokkos::OpenMP >();
-}
-
-TEST_F( openmp , impl_view_mapping ) {
-  test_view_mapping< Kokkos::OpenMP >();
-  test_view_mapping_subview< Kokkos::OpenMP >();
-  test_view_mapping_operator< Kokkos::OpenMP >();
-  TestViewMappingAtomic< Kokkos::OpenMP >::run();
-}
-
-TEST_F( openmp, view_impl) {
-  test_view_impl< Kokkos::OpenMP >();
-}
-
-TEST_F( openmp, view_api) {
-  TestViewAPI< double , Kokkos::OpenMP >();
-}
-
-TEST_F( openmp , view_nested_view )
-{
-  ::Test::view_nested_view< Kokkos::OpenMP >();
-}
-
 TEST_F( openmp , atomics )
 {
   const int loop_count = 1e4 ;
@@ -204,6 +100,8 @@ TEST_F( openmp , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 2 ) ) );
@@ -214,6 +112,8 @@ TEST_F( openmp , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 2 ) ) );
@@ -224,6 +124,8 @@ TEST_F( openmp , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 2 ) ) );
@@ -234,6 +136,8 @@ TEST_F( openmp , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 2 ) ) );
@@ -244,6 +148,8 @@ TEST_F( openmp , atomic_operations )
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 7 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 8 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 12 ) ) );
 
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 1 ) ) );
     ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 2 ) ) );
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c6910363515a0c2d0ec0531ba1f643f8afaf1983
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp
@@ -0,0 +1,189 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp , init ) {
+  ;
+}
+
+TEST_F( openmp , md_range ) {
+  TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100);
+
+  TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100);
+}
+
+TEST_F( openmp, policy_construction) {
+  TestRangePolicyConstruction< Kokkos::OpenMP >();
+  TestTeamPolicyConstruction< Kokkos::OpenMP >();
+}
+
+TEST_F( openmp , range_tag )
+{
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(0);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(0);
+
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(2);
+
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(3);
+
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
+
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
+  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
+}
+
+
+//----------------------------------------------------------------------------
+
+TEST_F( openmp , compiler_macros )
+{
+  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) );
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( openmp , memory_pool )
+{
+  bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 );
+  ASSERT_TRUE( val );
+
+  TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 );
+
+  TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >();
+}
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+TEST_F( openmp , task_fib )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestFib< Kokkos::OpenMP >::run(i, (i+1)*(i+1)*10000 );
+  }
+}
+
+TEST_F( openmp , task_depend )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run(i);
+  }
+}
+
+TEST_F( openmp , task_team )
+{
+  TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run(1000);
+  //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //put back after testing
+}
+
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
+TEST_F( openmp , cxx11 )
+{
+  if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) {
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) );
+  }
+}
+#endif
+
+TEST_F( openmp, tile_layout )
+{
+  TestTile::test< Kokkos::OpenMP , 1 , 1 >( 1 , 1 );
+  TestTile::test< Kokkos::OpenMP , 1 , 1 >( 2 , 3 );
+  TestTile::test< Kokkos::OpenMP , 1 , 1 >( 9 , 10 );
+
+  TestTile::test< Kokkos::OpenMP , 2 , 2 >( 1 , 1 );
+  TestTile::test< Kokkos::OpenMP , 2 , 2 >( 2 , 3 );
+  TestTile::test< Kokkos::OpenMP , 2 , 2 >( 4 , 4 );
+  TestTile::test< Kokkos::OpenMP , 2 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::OpenMP , 2 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::OpenMP , 4 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::OpenMP , 4 , 4 >( 1 , 1 );
+  TestTile::test< Kokkos::OpenMP , 4 , 4 >( 4 , 4 );
+  TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 11 );
+
+  TestTile::test< Kokkos::OpenMP , 8 , 8 >( 1 , 1 );
+  TestTile::test< Kokkos::OpenMP , 8 , 8 >( 4 , 4 );
+  TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 9 );
+  TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 11 );
+}
+
+
+TEST_F( openmp , dispatch )
+{
+  const int repeat = 100 ;
+  for ( int i = 0 ; i < repeat ; ++i ) {
+  for ( int j = 0 ; j < repeat ; ++j ) {
+    Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >(0,j)
+                        , KOKKOS_LAMBDA( int ) {} );
+  }}
+}
+
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d41e1493eea6306d68087d1a8562ab963e1ec039
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp
@@ -0,0 +1,138 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, long_reduce) {
+  TestReduce< long ,   Kokkos::OpenMP >( 0 );
+  TestReduce< long ,   Kokkos::OpenMP >( 1000000 );
+}
+
+TEST_F( openmp, double_reduce) {
+  TestReduce< double ,   Kokkos::OpenMP >( 0 );
+  TestReduce< double ,   Kokkos::OpenMP >( 1000000 );
+}
+
+TEST_F( openmp , reducers )
+{
+  TestReducers<int, Kokkos::OpenMP>::execute_integer();
+  TestReducers<size_t, Kokkos::OpenMP>::execute_integer();
+  TestReducers<double, Kokkos::OpenMP>::execute_float();
+  TestReducers<Kokkos::complex<double>, Kokkos::OpenMP>::execute_basic();
+}
+
+TEST_F( openmp, long_reduce_dynamic ) {
+  TestReduceDynamic< long ,   Kokkos::OpenMP >( 0 );
+  TestReduceDynamic< long ,   Kokkos::OpenMP >( 1000000 );
+}
+
+TEST_F( openmp, double_reduce_dynamic ) {
+  TestReduceDynamic< double ,   Kokkos::OpenMP >( 0 );
+  TestReduceDynamic< double ,   Kokkos::OpenMP >( 1000000 );
+}
+
+TEST_F( openmp, long_reduce_dynamic_view ) {
+  TestReduceDynamicView< long ,   Kokkos::OpenMP >( 0 );
+  TestReduceDynamicView< long ,   Kokkos::OpenMP >( 1000000 );
+}
+
+TEST_F( openmp , scan )
+{
+  TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 );
+  TestScan< Kokkos::OpenMP >( 0 );
+  TestScan< Kokkos::OpenMP >( 100000 );
+  TestScan< Kokkos::OpenMP >( 10000000 );
+  Kokkos::OpenMP::fence();
+}
+
+#if 0
+TEST_F( openmp , scan_small )
+{
+  typedef TestScan< Kokkos::OpenMP , Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor ;
+  for ( int i = 0 ; i < 1000 ; ++i ) {
+    TestScanFunctor( 10 );
+    TestScanFunctor( 10000 );
+  }
+  TestScanFunctor( 1000000 );
+  TestScanFunctor( 10000000 );
+
+  Kokkos::OpenMP::fence();
+}
+#endif
+
+TEST_F( openmp  , team_scan )
+{
+  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10 );
+  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
+  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10000 );
+  TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
+}
+
+TEST_F( openmp , team_long_reduce) {
+  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( openmp , team_double_reduce) {
+  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( openmp , reduction_deduction )
+{
+  TestCXX11::test_reduction_deduction< Kokkos::OpenMP >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/TestOpenMP_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp
similarity index 70%
rename from lib/kokkos/core/unit_test/TestOpenMP_a.cpp
rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp
index 64eac66804b1ef6a053930d6db47abb566ccda66..9854417e42da5a8bdd6986b85fbdd754bab3e57b 100644
--- a/lib/kokkos/core/unit_test/TestOpenMP_a.cpp
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,60 +36,14 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
-
-#include <gtest/gtest.h>
-
-#include <Kokkos_Macros.hpp>
-#ifdef KOKKOS_LAMBDA
-#undef KOKKOS_LAMBDA
-#endif
-#define KOKKOS_LAMBDA [=]
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemoryPool.hpp>
-
-
-#include <TestCXX11.hpp>
-#include <TestCXX11Deduction.hpp>
-#include <TestTeamVector.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-
-#include <TestPolicyConstruction.hpp>
-
+#include <openmp/TestOpenMP.hpp>
 
 namespace Test {
 
-class openmp : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
 TEST_F( openmp, view_subview_auto_1d_left ) {
   TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::OpenMP >();
 }
@@ -134,17 +88,5 @@ TEST_F( openmp, view_subview_right_3 ) {
   TestViewSubview::test_right_3< Kokkos::OpenMP >();
 }
 
-TEST_F( openmp, view_subview_1d_assign ) {
-  TestViewSubview::test_1d_assign< Kokkos::OpenMP >();
-}
-
-TEST_F( openmp, view_subview_2d_from_3d ) {
-  TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >();
-}
-
-TEST_F( openmp, view_subview_2d_from_5d ) {
-  TestViewSubview::test_2d_subview_5d< Kokkos::OpenMP >();
-}
-
 } // namespace test
 
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2aa1fc5c633ffab0319c37c7a00a9abe48438597
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp
@@ -0,0 +1,60 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_layoutleft_to_layoutleft) {
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+TEST_F( openmp, view_subview_layoutright_to_layoutright) {
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1a6871cfca8f3136b13011f66576cd7a9d891978
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_1d_assign ) {
+  TestViewSubview::test_1d_assign< Kokkos::OpenMP >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b04edbb997d564a2e921bacf7b36959b17e8755f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_1d_assign_atomic ) {
+  TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..765e235830db2f7e48ad8fe9df271429fef2c2ab
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_1d_assign_randomaccess ) {
+  TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9d8b62708a3d4d898ddbc923b733c78c869c2826
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_2d_from_3d ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9c19cf0e57dcf7058f4f0aeb4752465c470e9fa9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_2d_from_3d_atomic ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c1bdf72351b02958f5e1e857c41f7e5d999ade64
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..08a3b5a54a2c66599ebc61384357324a79815507
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_3d_from_5d_left ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0864ebbdaa44b1bd00a154fe2f7fcf4b55ae48eb
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e38dfecbf6e353bcab69f7341d2754ea6ef85cf9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp
similarity index 89%
rename from lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp
index 61d2e35702f998a83e0796e7d291dff7e3466dd4..b7e4683d23d18bb838c97a1fa198b2d38874de77 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,21 +36,17 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
+#include <openmp/TestOpenMP.hpp>
 
-#ifndef KOKKOS_VIEWTILELEFT_HPP
-#define KOKKOS_VIEWTILELEFT_HPP
-
-#include <impl/KokkosExp_ViewTile.hpp>
-
-namespace Kokkos {
-
-using Kokkos::Experimental::tile_subview ;
+namespace Test {
 
+TEST_F( openmp, view_subview_3d_from_5d_right ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP >();
 }
 
-#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */
+} // namespace test
 
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fc3e66fd4853c6104503aaf461eda97183cb44e1
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e21a13ee579e5052241252ffa6b99ba49f9c6b47
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9da159ab5773a0a7b1a49605cf1a88294a29d09d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp
@@ -0,0 +1,12 @@
+#include<openmp/TestOpenMP_SubView_c01.cpp>
+#include<openmp/TestOpenMP_SubView_c02.cpp>
+#include<openmp/TestOpenMP_SubView_c03.cpp>
+#include<openmp/TestOpenMP_SubView_c04.cpp>
+#include<openmp/TestOpenMP_SubView_c05.cpp>
+#include<openmp/TestOpenMP_SubView_c06.cpp>
+#include<openmp/TestOpenMP_SubView_c07.cpp>
+#include<openmp/TestOpenMP_SubView_c08.cpp>
+#include<openmp/TestOpenMP_SubView_c09.cpp>
+#include<openmp/TestOpenMP_SubView_c10.cpp>
+#include<openmp/TestOpenMP_SubView_c11.cpp>
+#include<openmp/TestOpenMP_SubView_c12.cpp>
diff --git a/lib/kokkos/core/unit_test/TestOpenMP_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp
similarity index 52%
rename from lib/kokkos/core/unit_test/TestOpenMP_b.cpp
rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp
index 6cc2476014c8d8e07ef6bc6a60b38c3660d3d7c4..1539e30e1936998c8ea389144c0617b468ab5181 100644
--- a/lib/kokkos/core/unit_test/TestOpenMP_b.cpp
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp
@@ -40,122 +40,29 @@
 // ************************************************************************
 //@HEADER
 */
-
-#include <gtest/gtest.h>
-
-#include <Kokkos_Macros.hpp>
-#ifdef KOKKOS_LAMBDA
-#undef KOKKOS_LAMBDA
-#endif
-#define KOKKOS_LAMBDA [=]
-
-#include <Kokkos_Core.hpp>
-
-//----------------------------------------------------------------------------
-
-#include <TestViewImpl.hpp>
-#include <TestAtomic.hpp>
-
-#include <TestViewAPI.hpp>
-#include <TestViewSubview.hpp>
-#include <TestViewOfClass.hpp>
-
-#include <TestSharedAlloc.hpp>
-#include <TestViewMapping.hpp>
-
-#include <TestRange.hpp>
-#include <TestTeam.hpp>
-#include <TestReduce.hpp>
-#include <TestScan.hpp>
-#include <TestAggregate.hpp>
-#include <TestAggregateReduction.hpp>
-#include <TestCompilerMacros.hpp>
-#include <TestMemoryPool.hpp>
-
-
-#include <TestCXX11.hpp>
-#include <TestCXX11Deduction.hpp>
-#include <TestTeamVector.hpp>
-#include <TestMemorySpaceTracking.hpp>
-#include <TestTemplateMetaFunctions.hpp>
-
-#include <TestPolicyConstruction.hpp>
-
+#include <openmp/TestOpenMP.hpp>
 
 namespace Test {
 
-class openmp : public ::testing::Test {
-protected:
-  static void SetUpTestCase();
-  static void TearDownTestCase();
-};
-
-TEST_F( openmp , range_tag )
-{
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
-  TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
-}
-
 TEST_F( openmp , team_tag )
 {
+  TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2);
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2);
+
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
   TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
 }
 
-TEST_F( openmp, long_reduce) {
-  TestReduce< long ,   Kokkos::OpenMP >( 1000000 );
-}
-
-TEST_F( openmp, double_reduce) {
-  TestReduce< double ,   Kokkos::OpenMP >( 1000000 );
-}
-
-TEST_F( openmp, long_reduce_dynamic ) {
-  TestReduceDynamic< long ,   Kokkos::OpenMP >( 1000000 );
-}
-
-TEST_F( openmp, double_reduce_dynamic ) {
-  TestReduceDynamic< double ,   Kokkos::OpenMP >( 1000000 );
-}
-
-TEST_F( openmp, long_reduce_dynamic_view ) {
-  TestReduceDynamicView< long ,   Kokkos::OpenMP >( 1000000 );
-}
-
-TEST_F( openmp , reducers )
-{
-  TestReducers<int, Kokkos::OpenMP>::execute_integer();
-  TestReducers<size_t, Kokkos::OpenMP>::execute_integer();
-  TestReducers<double, Kokkos::OpenMP>::execute_float();
-  TestReducers<Kokkos::complex<double>, Kokkos::OpenMP>::execute_basic();
-}
-
-TEST_F( openmp, team_long_reduce) {
-  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< long ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( openmp, team_double_reduce) {
-  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 );
-  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
-  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 );
-  TestReduceTeam< double ,   Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
-}
-
-TEST_F( openmp, team_shared_request) {
+TEST_F( openmp , team_shared_request) {
   TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >();
   TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >();
 }
@@ -166,7 +73,7 @@ TEST_F( openmp, team_scratch_request) {
 }
 
 #if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
-TEST_F( openmp, team_lambda_shared_request) {
+TEST_F( openmp , team_lambda_shared_request) {
   TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >();
   TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >();
 }
@@ -181,5 +88,35 @@ TEST_F( openmp, multi_level_scratch) {
   TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >();
 }
 
+TEST_F( openmp , team_vector )
+{
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) );
+}
+
+#ifdef KOKKOS_COMPILER_GNU
+#if ( KOKKOS_COMPILER_GNU == 472 )
+#define SKIP_TEST
+#endif
+#endif
+
+#ifndef SKIP_TEST
+TEST_F( openmp, triple_nested_parallelism )
+{
+  TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 32 );
+  TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 16 );
+  TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 16 , 16 );
+}
+#endif
+
 } // namespace test
 
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..82cbf3ea18ecf7c3c424c73fe3e41ebf4a4e0c26
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp
@@ -0,0 +1,53 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp , impl_view_mapping_a ) {
+  test_view_mapping< Kokkos::OpenMP >();
+  test_view_mapping_operator< Kokkos::OpenMP >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b2d4f87fdd417ab2d1036884dcce4b0df5793396
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp
@@ -0,0 +1,121 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <openmp/TestOpenMP.hpp>
+
+namespace Test {
+
+TEST_F( openmp , impl_shared_alloc ) {
+  test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >();
+}
+
+TEST_F( openmp , impl_view_mapping_b ) {
+  test_view_mapping_subview< Kokkos::OpenMP >();
+  TestViewMappingAtomic< Kokkos::OpenMP >::run();
+}
+
+TEST_F( openmp, view_api) {
+  TestViewAPI< double , Kokkos::OpenMP >();
+}
+
+TEST_F( openmp , view_nested_view )
+{
+  ::Test::view_nested_view< Kokkos::OpenMP >();
+}
+
+
+
+TEST_F( openmp , view_remap )
+{
+  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
+
+  typedef Kokkos::View< double*[N1][N2][N3] ,
+                             Kokkos::LayoutRight ,
+                             Kokkos::OpenMP > output_type ;
+
+  typedef Kokkos::View< int**[N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::OpenMP > input_type ;
+
+  typedef Kokkos::View< int*[N0][N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::OpenMP > diff_type ;
+
+  output_type output( "output" , N0 );
+  input_type  input ( "input" , N0 , N1 );
+  diff_type   diff  ( "diff" , N0 );
+
+  int value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    input(i0,i1,i2,i3) = ++value ;
+  }}}}
+
+  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
+  Kokkos::deep_copy( output , input );
+
+  value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    ++value ;
+    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
+  }}}}
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( openmp , view_aggregate )
+{
+  TestViewAggregate< Kokkos::OpenMP >();
+}
+
+TEST_F( openmp , template_meta_functions )
+{
+  TestTemplateMetaFunctions<int, Kokkos::OpenMP >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial.hpp b/lib/kokkos/core/unit_test/serial/TestSerial.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a966257fca62f727dd050ac8e9ba6f32cf6985ca
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial.hpp
@@ -0,0 +1,102 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_TEST_SERIALHPP
+#define KOKKOS_TEST_SERIALHPP
+#include <gtest/gtest.h>
+
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_LAMBDA
+#undef KOKKOS_LAMBDA
+#endif
+#define KOKKOS_LAMBDA [=]
+
+#include <Kokkos_Core.hpp>
+
+#include <TestTile.hpp>
+
+//----------------------------------------------------------------------------
+
+#include <TestSharedAlloc.hpp>
+#include <TestViewMapping.hpp>
+
+
+#include <TestViewAPI.hpp>
+#include <TestViewOfClass.hpp>
+#include <TestViewSubview.hpp>
+#include <TestAtomic.hpp>
+#include <TestAtomicOperations.hpp>
+#include <TestRange.hpp>
+#include <TestTeam.hpp>
+#include <TestReduce.hpp>
+#include <TestScan.hpp>
+#include <TestAggregate.hpp>
+#include <TestCompilerMacros.hpp>
+#include <TestTaskScheduler.hpp>
+#include <TestMemoryPool.hpp>
+
+
+#include <TestCXX11.hpp>
+#include <TestCXX11Deduction.hpp>
+#include <TestTeamVector.hpp>
+#include <TestTemplateMetaFunctions.hpp>
+
+#include <TestPolicyConstruction.hpp>
+
+#include <TestMDRange.hpp>
+
+namespace Test {
+
+class serial : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
+    {
+      Kokkos::HostSpace::execution_space::initialize();
+    }
+  static void TearDownTestCase()
+    {
+      Kokkos::HostSpace::execution_space::finalize();
+    }
+};
+
+}
+#endif
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6eec0683aeecaeae28a46a743b01164d7db7eb9c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp
@@ -0,0 +1,168 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial , atomics )
+{
+  const int loop_count = 1e6 ;
+
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,3) ) );
+}
+
+TEST_F( serial , atomic_operations )
+{
+  const int start = 1; //Avoid zero for division
+  const int end = 11;
+  for (int i = start; i < end; ++i)
+  {
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 12) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 4 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 4 ) ) );
+  }
+
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1c32cfaf5e1249e3de3e338bd2abf402525c95b
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp
@@ -0,0 +1,165 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial , md_range ) {
+  TestMDRange_2D< Kokkos::Serial >::test_for2(100,100);
+
+  TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100);
+}
+
+TEST_F( serial, policy_construction) {
+  TestRangePolicyConstruction< Kokkos::Serial >();
+  TestTeamPolicyConstruction< Kokkos::Serial >();
+}
+
+TEST_F( serial , range_tag )
+{
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(0);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0);
+
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
+  TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
+}
+
+
+//----------------------------------------------------------------------------
+
+TEST_F( serial , compiler_macros )
+{
+  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) );
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( serial , memory_pool )
+{
+  bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 );
+  ASSERT_TRUE( val );
+
+  TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 );
+
+  TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >();
+}
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+TEST_F( serial , task_fib )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestFib< Kokkos::Serial >::run(i);
+  }
+}
+
+TEST_F( serial , task_depend )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run(i);
+  }
+}
+
+TEST_F( serial , task_team )
+{
+  TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run(1000);
+  //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing
+}
+
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
+TEST_F( serial , cxx11 )
+{
+  if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) {
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) );
+  }
+}
+#endif
+
+TEST_F( serial, tile_layout )
+{
+  TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 );
+  TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 );
+  TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 );
+
+  TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 );
+  TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 );
+  TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 );
+  TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 );
+  TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 );
+  TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 );
+
+  TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 );
+  TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 );
+  TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 );
+  TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 );
+}
+
+
+
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..25b5ac6d16a8d101dd1e7d940007a107d1c814fc
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp
@@ -0,0 +1,122 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, long_reduce) {
+  TestReduce< long ,   Kokkos::Serial >( 0 );
+  TestReduce< long ,   Kokkos::Serial >( 1000000 );
+}
+
+TEST_F( serial, double_reduce) {
+  TestReduce< double ,   Kokkos::Serial >( 0 );
+  TestReduce< double ,   Kokkos::Serial >( 1000000 );
+}
+
+TEST_F( serial , reducers )
+{
+  TestReducers<int, Kokkos::Serial>::execute_integer();
+  TestReducers<size_t, Kokkos::Serial>::execute_integer();
+  TestReducers<double, Kokkos::Serial>::execute_float();
+  TestReducers<Kokkos::complex<double>, Kokkos::Serial>::execute_basic();
+}
+
+TEST_F( serial, long_reduce_dynamic ) {
+  TestReduceDynamic< long ,   Kokkos::Serial >( 0 );
+  TestReduceDynamic< long ,   Kokkos::Serial >( 1000000 );
+}
+
+TEST_F( serial, double_reduce_dynamic ) {
+  TestReduceDynamic< double ,   Kokkos::Serial >( 0 );
+  TestReduceDynamic< double ,   Kokkos::Serial >( 1000000 );
+}
+
+TEST_F( serial, long_reduce_dynamic_view ) {
+  TestReduceDynamicView< long ,   Kokkos::Serial >( 0 );
+  TestReduceDynamicView< long ,   Kokkos::Serial >( 1000000 );
+}
+
+TEST_F( serial , scan )
+{
+  TestScan< Kokkos::Serial >::test_range( 1 , 1000 );
+  TestScan< Kokkos::Serial >( 0 );
+  TestScan< Kokkos::Serial >( 10 );
+  TestScan< Kokkos::Serial >( 10000 );
+}
+
+TEST_F( serial  , team_scan )
+{
+  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10 );
+  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
+  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10000 );
+  TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
+}
+
+TEST_F( serial , team_long_reduce) {
+  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< long ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( serial , team_double_reduce) {
+  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< double ,   Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( serial , reduction_deduction )
+{
+  TestCXX11::test_reduction_deduction< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bc838ccde4b36cf964d0da97500fdbd921a85aa0
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp
@@ -0,0 +1,92 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_auto_1d_left ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_auto_1d_right ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_auto_1d_stride ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_assign_strided ) {
+  TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_left_0 ) {
+  TestViewSubview::test_left_0< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_left_1 ) {
+  TestViewSubview::test_left_1< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_left_2 ) {
+  TestViewSubview::test_left_2< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_left_3 ) {
+  TestViewSubview::test_left_3< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_right_0 ) {
+  TestViewSubview::test_right_0< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_right_1 ) {
+  TestViewSubview::test_right_1< Kokkos::Serial >();
+}
+
+TEST_F( serial, view_subview_right_3 ) {
+  TestViewSubview::test_right_3< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e6a5b56d3ed48ac2301e56b944e4924dcb79451e
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp
@@ -0,0 +1,60 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_layoutleft_to_layoutleft) {
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+TEST_F( serial, view_subview_layoutright_to_layoutright) {
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0b7a0d3bfa6fa514195a4fd6241fc262f0ad884d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_1d_assign ) {
+  TestViewSubview::test_1d_assign< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8ca7285c1f8331cb6992411d6b35d7bc054945a3
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_1d_assign_atomic ) {
+  TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1d156c741524315d2fb66fdc5e852329d846d3ae
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_1d_assign_randomaccess ) {
+  TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ebf0e5c99155afe17dea3807981d712e1d67c601
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_2d_from_3d ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..74acb92f1b9e632a980b7d0141a54200aebbfd15
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_2d_from_3d_atomic ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8075d46e0fe15c4c15a47e80f6172d4990fd6ce5
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_2d_from_3d_randomaccess ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ce8222643a5d3a183fad578013945a67efd6847
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_3d_from_5d_left ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c8a5c8f33fdc70a2408aade42f21b3c451753b4c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_3d_from_5d_left_atomic ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b66f15f17da1b7f0bcb24459678965dacee04f9b
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e5e3cf3d1af0f0755ab8fa3f8be9f846ff554e9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_3d_from_5d_right ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..55a353bcafef5e852ec33c80d9084f7c2236efcc
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_3d_from_5d_right_atomic ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a168e1e232ff5f71cce593be776496cbd7dd6c25
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a489b0fcb585aa0e12310f09a0701188b8814045
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp
@@ -0,0 +1,12 @@
+#include<serial/TestSerial_SubView_c01.cpp>
+#include<serial/TestSerial_SubView_c02.cpp>
+#include<serial/TestSerial_SubView_c03.cpp>
+#include<serial/TestSerial_SubView_c04.cpp>
+#include<serial/TestSerial_SubView_c05.cpp>
+#include<serial/TestSerial_SubView_c06.cpp>
+#include<serial/TestSerial_SubView_c07.cpp>
+#include<serial/TestSerial_SubView_c08.cpp>
+#include<serial/TestSerial_SubView_c09.cpp>
+#include<serial/TestSerial_SubView_c10.cpp>
+#include<serial/TestSerial_SubView_c11.cpp>
+#include<serial/TestSerial_SubView_c12.cpp>
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3318e5f2457a9f4e79a0dbdd2a5f44571b895be1
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp
@@ -0,0 +1,117 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial , team_tag )
+{
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
+  TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
+}
+
+TEST_F( serial , team_shared_request) {
+  TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >();
+  TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+TEST_F( serial, team_scratch_request) {
+  TestScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >();
+  TestScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+TEST_F( serial , team_lambda_shared_request) {
+  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >();
+  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+#endif
+
+TEST_F( serial, shmem_size) {
+  TestShmemSize< Kokkos::Serial >();
+}
+
+TEST_F( serial, multi_level_scratch) {
+  TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >();
+  TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+TEST_F( serial , team_vector )
+{
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) );
+}
+
+#ifdef KOKKOS_COMPILER_GNU
+#if ( KOKKOS_COMPILER_GNU == 472 )
+#define SKIP_TEST
+#endif
+#endif
+
+#ifndef SKIP_TEST
+TEST_F( serial, triple_nested_parallelism )
+{
+  TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 32 );
+  TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 16 );
+  TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 16 , 16 );
+}
+#endif
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c655fe770f26fd8d6b239251c5d6301140faa09
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp
@@ -0,0 +1,53 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial , impl_view_mapping_a ) {
+  test_view_mapping< Kokkos::Serial >();
+  test_view_mapping_operator< Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4947f2eaaef607b04d680a7c9c64ae6f2d8e6087
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp
@@ -0,0 +1,121 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <serial/TestSerial.hpp>
+
+namespace Test {
+
+TEST_F( serial , impl_shared_alloc ) {
+  test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >();
+}
+
+TEST_F( serial , impl_view_mapping_b ) {
+  test_view_mapping_subview< Kokkos::Serial >();
+  TestViewMappingAtomic< Kokkos::Serial >::run();
+}
+
+TEST_F( serial, view_api) {
+  TestViewAPI< double , Kokkos::Serial >();
+}
+
+TEST_F( serial , view_nested_view )
+{
+  ::Test::view_nested_view< Kokkos::Serial >();
+}
+
+
+
+TEST_F( serial , view_remap )
+{
+  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
+
+  typedef Kokkos::View< double*[N1][N2][N3] ,
+                             Kokkos::LayoutRight ,
+                             Kokkos::Serial > output_type ;
+
+  typedef Kokkos::View< int**[N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::Serial > input_type ;
+
+  typedef Kokkos::View< int*[N0][N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::Serial > diff_type ;
+
+  output_type output( "output" , N0 );
+  input_type  input ( "input" , N0 , N1 );
+  diff_type   diff  ( "diff" , N0 );
+
+  int value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    input(i0,i1,i2,i3) = ++value ;
+  }}}}
+
+  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
+  Kokkos::deep_copy( output , input );
+
+  value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    ++value ;
+    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
+  }}}}
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( serial , view_aggregate )
+{
+  TestViewAggregate< Kokkos::Serial >();
+}
+
+TEST_F( serial , template_meta_functions )
+{
+  TestTemplateMetaFunctions<int, Kokkos::Serial >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads.hpp b/lib/kokkos/core/unit_test/threads/TestThreads.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bb9f36581aa753f15c789d2a7592031ed70caa57
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads.hpp
@@ -0,0 +1,114 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_TEST_THREADSHPP
+#define KOKKOS_TEST_THREADSHPP
+#include <gtest/gtest.h>
+
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_LAMBDA
+#undef KOKKOS_LAMBDA
+#endif
+#define KOKKOS_LAMBDA [=]
+
+#include <Kokkos_Core.hpp>
+
+#include <TestTile.hpp>
+
+//----------------------------------------------------------------------------
+
+#include <TestSharedAlloc.hpp>
+#include <TestViewMapping.hpp>
+
+
+#include <TestViewAPI.hpp>
+#include <TestViewOfClass.hpp>
+#include <TestViewSubview.hpp>
+#include <TestAtomic.hpp>
+#include <TestAtomicOperations.hpp>
+#include <TestRange.hpp>
+#include <TestTeam.hpp>
+#include <TestReduce.hpp>
+#include <TestScan.hpp>
+#include <TestAggregate.hpp>
+#include <TestCompilerMacros.hpp>
+#include <TestTaskScheduler.hpp>
+#include <TestMemoryPool.hpp>
+
+
+#include <TestCXX11.hpp>
+#include <TestCXX11Deduction.hpp>
+#include <TestTeamVector.hpp>
+#include <TestTemplateMetaFunctions.hpp>
+
+#include <TestPolicyConstruction.hpp>
+
+#include <TestMDRange.hpp>
+
+namespace Test {
+
+class threads : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
+  {
+    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
+    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
+    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
+
+    unsigned threads_count = 0 ;
+
+    threads_count = std::max( 1u , numa_count )
+                  * std::max( 2u , cores_per_numa * threads_per_core );
+
+    Kokkos::Threads::initialize( threads_count );
+    Kokkos::Threads::print_configuration( std::cout , true /* detailed */ );
+  }
+
+  static void TearDownTestCase()
+  {
+    Kokkos::Threads::finalize();
+  }
+};
+
+
+}
+#endif
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8ce32fc3385b28f1be58aa82606a59bffc192bc1
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp
@@ -0,0 +1,168 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads , atomics )
+{
+  const int loop_count = 1e4 ;
+
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) );
+
+  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,1) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,2) ) );
+  ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,3) ) );
+}
+
+TEST_F( threads , atomic_operations )
+{
+  const int start = 1; //Avoid zero for division
+  const int end = 11;
+  for (int i = start; i < end; ++i)
+  {
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 4 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 5 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 6 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 7 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 8 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 9 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 11 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 12 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 4 ) ) );
+
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 1 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 2 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 3 ) ) );
+    ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 4 ) ) );
+  }
+
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d9f17cc88a85105e41c887be2531261a58f8f436
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp
@@ -0,0 +1,189 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads , init ) {
+  ;
+}
+
+TEST_F( threads , md_range ) {
+  TestMDRange_2D< Kokkos::Threads >::test_for2(100,100);
+
+  TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100);
+}
+
+TEST_F( threads, policy_construction) {
+  TestRangePolicyConstruction< Kokkos::Threads >();
+  TestTeamPolicyConstruction< Kokkos::Threads >();
+}
+
+TEST_F( threads , range_tag )
+{
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(0);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(0);
+
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(2);
+
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(3);
+
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000);
+
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001);
+  TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000);
+}
+
+
+//----------------------------------------------------------------------------
+
+TEST_F( threads , compiler_macros )
+{
+  ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) );
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( threads , memory_pool )
+{
+  bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 );
+  ASSERT_TRUE( val );
+
+  TestMemoryPool::test_mempool2< Kokkos::Threads >( 64, 4, 1000000, 2000000 );
+
+  TestMemoryPool::test_memory_exhaustion< Kokkos::Threads >();
+}
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+/*
+TEST_F( threads , task_fib )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestFib< Kokkos::Threads >::run(i);
+  }
+}
+
+TEST_F( threads , task_depend )
+{
+  for ( int i = 0 ; i < 25 ; ++i ) {
+    TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run(i);
+  }
+}
+
+TEST_F( threads , task_team )
+{
+  TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run(1000);
+  //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run(1000); //put back after testing
+}
+*/
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+
+//----------------------------------------------------------------------------
+
+#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
+TEST_F( threads , cxx11 )
+{
+  if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) {
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) );
+    ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) );
+  }
+}
+#endif
+
+TEST_F( threads, tile_layout )
+{
+  TestTile::test< Kokkos::Threads , 1 , 1 >( 1 , 1 );
+  TestTile::test< Kokkos::Threads , 1 , 1 >( 2 , 3 );
+  TestTile::test< Kokkos::Threads , 1 , 1 >( 9 , 10 );
+
+  TestTile::test< Kokkos::Threads , 2 , 2 >( 1 , 1 );
+  TestTile::test< Kokkos::Threads , 2 , 2 >( 2 , 3 );
+  TestTile::test< Kokkos::Threads , 2 , 2 >( 4 , 4 );
+  TestTile::test< Kokkos::Threads , 2 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::Threads , 2 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::Threads , 4 , 2 >( 9 , 9 );
+
+  TestTile::test< Kokkos::Threads , 4 , 4 >( 1 , 1 );
+  TestTile::test< Kokkos::Threads , 4 , 4 >( 4 , 4 );
+  TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 9 );
+  TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 11 );
+
+  TestTile::test< Kokkos::Threads , 8 , 8 >( 1 , 1 );
+  TestTile::test< Kokkos::Threads , 8 , 8 >( 4 , 4 );
+  TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 9 );
+  TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 11 );
+}
+
+
+TEST_F( threads , dispatch )
+{
+  const int repeat = 100 ;
+  for ( int i = 0 ; i < repeat ; ++i ) {
+  for ( int j = 0 ; j < repeat ; ++j ) {
+    Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j)
+                        , KOKKOS_LAMBDA( int ) {} );
+  }}
+}
+
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a637d1e3ab654b402e49b7d3aec582e425d2592a
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp
@@ -0,0 +1,138 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, long_reduce) {
+  TestReduce< long ,   Kokkos::Threads >( 0 );
+  TestReduce< long ,   Kokkos::Threads >( 1000000 );
+}
+
+TEST_F( threads, double_reduce) {
+  TestReduce< double ,   Kokkos::Threads >( 0 );
+  TestReduce< double ,   Kokkos::Threads >( 1000000 );
+}
+
+TEST_F( threads , reducers )
+{
+  TestReducers<int, Kokkos::Threads>::execute_integer();
+  TestReducers<size_t, Kokkos::Threads>::execute_integer();
+  TestReducers<double, Kokkos::Threads>::execute_float();
+  TestReducers<Kokkos::complex<double>, Kokkos::Threads>::execute_basic();
+}
+
+TEST_F( threads, long_reduce_dynamic ) {
+  TestReduceDynamic< long ,   Kokkos::Threads >( 0 );
+  TestReduceDynamic< long ,   Kokkos::Threads >( 1000000 );
+}
+
+TEST_F( threads, double_reduce_dynamic ) {
+  TestReduceDynamic< double ,   Kokkos::Threads >( 0 );
+  TestReduceDynamic< double ,   Kokkos::Threads >( 1000000 );
+}
+
+TEST_F( threads, long_reduce_dynamic_view ) {
+  TestReduceDynamicView< long ,   Kokkos::Threads >( 0 );
+  TestReduceDynamicView< long ,   Kokkos::Threads >( 1000000 );
+}
+
+TEST_F( threads , scan )
+{
+  TestScan< Kokkos::Threads >::test_range( 1 , 1000 );
+  TestScan< Kokkos::Threads >( 0 );
+  TestScan< Kokkos::Threads >( 100000 );
+  TestScan< Kokkos::Threads >( 10000000 );
+  Kokkos::Threads::fence();
+}
+
+#if 0
+TEST_F( threads , scan_small )
+{
+  typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ;
+  for ( int i = 0 ; i < 1000 ; ++i ) {
+    TestScanFunctor( 10 );
+    TestScanFunctor( 10000 );
+  }
+  TestScanFunctor( 1000000 );
+  TestScanFunctor( 10000000 );
+
+  Kokkos::Threads::fence();
+}
+#endif
+
+TEST_F( threads  , team_scan )
+{
+  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10 );
+  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
+  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10000 );
+  TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
+}
+
+TEST_F( threads , team_long_reduce) {
+  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< long ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( threads , team_double_reduce) {
+  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< double ,   Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( threads , reduction_deduction )
+{
+  TestCXX11::test_reduction_deduction< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2df9e19deb0130359d81b8c3cc001bb85ee7cb2f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp
@@ -0,0 +1,92 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_auto_1d_left ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_auto_1d_right ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_auto_1d_stride ) {
+  TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_assign_strided ) {
+  TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_left_0 ) {
+  TestViewSubview::test_left_0< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_left_1 ) {
+  TestViewSubview::test_left_1< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_left_2 ) {
+  TestViewSubview::test_left_2< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_left_3 ) {
+  TestViewSubview::test_left_3< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_right_0 ) {
+  TestViewSubview::test_right_0< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_right_1 ) {
+  TestViewSubview::test_right_1< Kokkos::Threads >();
+}
+
+TEST_F( threads, view_subview_right_3 ) {
+  TestViewSubview::test_right_3< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d57dbe97c0d38aaa6a2e48816eb9872a8585afb7
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp
@@ -0,0 +1,60 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_layoutleft_to_layoutleft) {
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+TEST_F( threads, view_subview_layoutright_to_layoutright) {
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..67d998c0e86488df0023cc0138ffe022cdc52d94
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_1d_assign ) {
+  TestViewSubview::test_1d_assign< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e340240c48d6d28c9bc4c79b777a3e1a4a8c4ddc
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_1d_assign_atomic ) {
+  TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ad27fa0fa6cee9db3eb63c581a175eee0cdd6e4e
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_1d_assign_randomaccess ) {
+  TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6fca47cc4ce41b56155fac8ce1d4b158d5e99c82
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_2d_from_3d ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c7dfca941582dee3d667f60152854ea30b393548
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_2d_from_3d_atomic ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..38e8394918614fdb528e9111d7fc1f54c7ff4d83
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_2d_from_3d_randomaccess ) {
+  TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f01fe6b5e6104416bb1f2f680cafeab48cac1ad
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_3d_from_5d_left ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e9a1ccbe30edcf7f512a5c20462df83cf52c3ac4
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_3d_from_5d_left_atomic ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c8b6c8743dd25a97db5f00e5bc7157c9f040c5d9
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7cef6fa07be88859c063470857d775964c74f2fa
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_3d_from_5d_right ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d67bf3157e337fef0af36dbba934f8bc22d74d0c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_3d_from_5d_right_atomic ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8a2c825cf3a9474d149d81a225cbadb16338cd7
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) {
+  TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..03f31b78c0bca12ef085d67b59a8f5ea45a5d614
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp
@@ -0,0 +1,122 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads , team_tag )
+{
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(0);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0);
+
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2);
+
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000);
+  TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000);
+}
+
+TEST_F( threads , team_shared_request) {
+  TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >();
+  TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+TEST_F( threads, team_scratch_request) {
+  TestScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >();
+  TestScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+TEST_F( threads , team_lambda_shared_request) {
+  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >();
+  TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+#endif
+
+TEST_F( threads, shmem_size) {
+  TestShmemSize< Kokkos::Threads >();
+}
+
+TEST_F( threads, multi_level_scratch) {
+  TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >();
+  TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+TEST_F( threads , team_vector )
+{
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) );
+}
+
+#ifdef KOKKOS_COMPILER_GNU
+#if ( KOKKOS_COMPILER_GNU == 472 )
+#define SKIP_TEST
+#endif
+#endif
+
+#ifndef SKIP_TEST
+TEST_F( threads, triple_nested_parallelism )
+{
+  TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 32 );
+  TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 16 );
+  TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 16 , 16 );
+}
+#endif
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..46a576b027fb2149302239ba31d6e53bd001e3ce
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp
@@ -0,0 +1,53 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads , impl_view_mapping_a ) {
+  test_view_mapping< Kokkos::Threads >();
+  test_view_mapping_operator< Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b5d6ac843d8177149d53fe1cb52528c6ef760f3d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp
@@ -0,0 +1,121 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <threads/TestThreads.hpp>
+
+namespace Test {
+
+TEST_F( threads , impl_shared_alloc ) {
+  test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >();
+}
+
+TEST_F( threads , impl_view_mapping_b ) {
+  test_view_mapping_subview< Kokkos::Threads >();
+  TestViewMappingAtomic< Kokkos::Threads >::run();
+}
+
+TEST_F( threads, view_api) {
+  TestViewAPI< double , Kokkos::Threads >();
+}
+
+TEST_F( threads , view_nested_view )
+{
+  ::Test::view_nested_view< Kokkos::Threads >();
+}
+
+
+
+TEST_F( threads , view_remap )
+{
+  enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
+
+  typedef Kokkos::View< double*[N1][N2][N3] ,
+                             Kokkos::LayoutRight ,
+                             Kokkos::Threads > output_type ;
+
+  typedef Kokkos::View< int**[N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::Threads > input_type ;
+
+  typedef Kokkos::View< int*[N0][N2][N3] ,
+                             Kokkos::LayoutLeft ,
+                             Kokkos::Threads > diff_type ;
+
+  output_type output( "output" , N0 );
+  input_type  input ( "input" , N0 , N1 );
+  diff_type   diff  ( "diff" , N0 );
+
+  int value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    input(i0,i1,i2,i3) = ++value ;
+  }}}}
+
+  // Kokkos::deep_copy( diff , input ); // throw with incompatible shape
+  Kokkos::deep_copy( output , input );
+
+  value = 0 ;
+  for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
+  for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
+  for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
+  for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
+    ++value ;
+    ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
+  }}}}
+}
+
+//----------------------------------------------------------------------------
+
+TEST_F( threads , view_aggregate )
+{
+  TestViewAggregate< Kokkos::Threads >();
+}
+
+TEST_F( threads , template_meta_functions )
+{
+  TestTemplateMetaFunctions<int, Kokkos::Threads >();
+}
+
+} // namespace test
+
diff --git a/lib/kokkos/doc/README b/lib/kokkos/doc/README
deleted file mode 100644
index 31e75f365c21a116a1fb736097f4f524e8d1e021..0000000000000000000000000000000000000000
--- a/lib/kokkos/doc/README
+++ /dev/null
@@ -1,32 +0,0 @@
-Kokkos uses the Doxygen tool for providing three documentation
-sources:
-- man pages
-- Latex User Guide
-- HTML Online User Guide.
-
-Man Pages
-
-Man pages are available for all files and functions in the directory
-TRILINOS_HOME/doc/kokkos/man, where TRILINOS_HOME is the location of your
-copy of Trilinos.  To use these pages with the Unix man utility, add
-the directory to your man path as follows:
-
-setenv MANPATH `echo $MANPATH`:TRILINOS_HOME/doc/kokkos/man
-
-
-LaTeX User Guide
-
-A postscript version of this guide is in
-TRILINOS_HOME/doc/kokkos/latex/user_guide.ps.  The LaTeX source is in the
-directory TRILINOS_HOME/doc/kokkos/latex.
-
-HTML Online User Guide
-
-The online guide is initiated by pointing your browser to
-TRILINOS_HOME/doc/kokkos/html/index.html
-
-Any question, comments or suggestions are welcome.  Please send to
-Mike Heroux at 
-
-320-845-7695
-maherou@sandia.gov
diff --git a/lib/kokkos/doc/design_notes_space_instances.md b/lib/kokkos/doc/design_notes_space_instances.md
new file mode 100644
index 0000000000000000000000000000000000000000..487fa25bcb32875ed3ba90821aba006a13cd506e
--- /dev/null
+++ b/lib/kokkos/doc/design_notes_space_instances.md
@@ -0,0 +1,166 @@
+# Design Notes for Execution and Memory Space Instances
+
+
+## Execution Spaces
+
+  *  Work is *dispatched* to an execution space instance
+
+
+
+## Host Associated Execution Space Instances
+
+Vocabulary and examples assuming C++11 Threads Support Library
+
+  *  A host-side *control* thread dispatches work to an instance
+
+  * `this_thread` is the control thread
+
+  * `main` is the initial control thread
+
+  *  An execution space instance is a pool of threads
+
+  *  All instances are disjoint thread pools
+
+  *  Exactly one control thread is associated with
+     an instance and only that control thread may
+     dispatch work to to that instance
+
+  *  A control thread may be a member of an instance,
+     if so then it is also the control thread associated
+     with that instance
+
+  *  The pool of threads associated with an instances is not mutatable
+
+  *  The pool of threads associated with an instance may be masked
+
+    -  Allows work to be dispatched to a subset of the pool
+
+    -  Example: only one hyperthread per core of the instance
+
+    -  When a mask is applied to an instance that mask
+       remains until cleared or another mask is applied
+
+    -  Masking is portable by defining it as using a fraction
+       of the available resources (threads)
+
+  *  Instances are shared (referenced counted) objects,
+     just like `Kokkos::View`
+
+```
+struct StdThread {
+  void mask( float fraction );
+  void unmask() { mask( 1.0 ); }
+};
+```
+
+
+
+### Requesting an Execution Space Instance
+
+  *  `Space::request(` *who* `,` *what* `,` *control-opt* `)`
+
+  *  *who* is an identifier for subsquent queries regarding
+    who requested each instance
+
+  *  *what* is the number of threads and how they should be placed
+
+    -  Placement within locality-topology hierarchy; e.g., HWLOC
+
+    -  Compact within a level of hierarchy, or striped across that level;
+       e.g., socket or NUMA region
+
+    -  Granularity of request is core
+
+  *  *control-opt*  optionally specifies whether the instance
+     has a new control thread
+
+    -  *control-opt* includes a control function / closure
+
+    -  The new control thread is a member of the instance
+
+    -  The control function is called by the new control thread
+       and is passed a `const` instance
+
+    -  The instance is **not** returned to the creating control thread
+
+  *  `std::thread` that is not a member of an instance is
+     *hard blocked* on a `std::mutex`
+
+    -  One global mutex or one mutex per thread?
+
+  *  `std::thread` that is a member of an instance is
+     *spinning* waiting for work, or are working
+
+```
+struct StdThread {
+
+  struct Resource ;
+
+  static StdThread request(); // default
+
+  static StdThread request( const std::string & , const Resource & );
+
+  // If the instance can be reserved then
+  // allocate a copy of ControlClosure and invoke
+  //   ControlClosure::operator()( const StdThread intance ) const
+  template< class ControlClosure >
+  static bool request( const std::string & , const Resource &
+                     , const ControlClosure & );
+};
+```
+
+### Relinquishing an Execution Space Instance
+
+  *  De-referencing the last reference-counted instance
+     relinquishes the pool of threads
+
+  *  If a control thread was created for the instance then
+     it is relinquished when that control thread returns
+     from the control function
+
+    -  Requires the reference count to be zero, an error if not
+
+  *  No *forced* relinquish
+
+
+
+## CUDA Associated Execution Space Instances
+
+  *  Only a signle CUDA architecture
+
+  *  An instance is a device + stream
+
+  *  A stream is exclusive to an instance
+
+  *  Only a host-side control thread can dispatch work to an instance
+
+  *  Finite number of streams per device
+
+  *  ISSUE:  How to use CUDA `const` memory with multiple streams?
+
+  *  Masking can be mapped to restricting the number of CUDA blocks
+     to the fraction of available resources; e.g., maximum resident blocks
+
+
+### Requesting an Execution Space Instance
+
+  *  `Space::request(` *who* `,` *what* `)`
+
+  *  *who* is an identifier for subsquent queries regarding
+    who requested each instance
+
+  *  *what* is which device, the stream is a requested/relinquished resource
+
+
+```
+struct Cuda {
+
+  struct Resource ;
+
+  static Cuda request();
+
+  static Cuda request( const std::string & , const Resource & );
+};
+```
+
+
diff --git a/lib/kokkos/example/common/VectorImport.hpp b/lib/kokkos/example/common/VectorImport.hpp
index 8ecd74d463c08f3624cf2be2d44b0ca1e4d008ad..48b28f8c2c2556c676993e2b259e68f0eb0abf73 100644
--- a/lib/kokkos/example/common/VectorImport.hpp
+++ b/lib/kokkos/example/common/VectorImport.hpp
@@ -112,13 +112,13 @@ private:
   // rank == 1 or array_layout == LayoutRight
   enum { OK = Kokkos::Impl::StaticAssert<
            ( VectorType::rank == 1 ) ||
-           Kokkos::Impl::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value
+           std::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value
          >::value };
 
   typedef typename VectorType::HostMirror HostVectorType ;
 
   enum { ReceiveInPlace =
-    Kokkos::Impl::is_same< typename VectorType::memory_space ,
+    std::is_same< typename VectorType::memory_space ,
                            typename HostVectorType::memory_space >::value };
 
   const CommMessageType  recv_msg ;
diff --git a/lib/kokkos/example/feint/ElemFunctor.hpp b/lib/kokkos/example/feint/ElemFunctor.hpp
index 651e34c2eed247f37986886c86f04ce24d76c551..583c4fda12a96a6c061ddb99d13e979a21f01a01 100644
--- a/lib/kokkos/example/feint/ElemFunctor.hpp
+++ b/lib/kokkos/example/feint/ElemFunctor.hpp
@@ -337,11 +337,7 @@ struct LumpElemToNode {
   // In this example we know that the ViewElemValue
   // array specification is < double*[nNode][nValue] >
 
-#if KOKKOS_USING_EXP_VIEW
   enum { value_count = ViewElemValue::dimension::N2 };
-#else
-  enum { value_count = ViewElemValue::shape_type::N2 };
-#endif
 
   ViewNodeValue             m_node_value ; ///< Integrated values at nodes
   ViewElemValue             m_elem_value ; ///< Values apportioned to nodes
diff --git a/lib/kokkos/example/feint/Makefile b/lib/kokkos/example/feint/Makefile
index f198a974c1e34d4014323eb34d03e7aa1f7445ba..9abf51d107c5cfd9ae1184d4cfac606f3f6e1629 100644
--- a/lib/kokkos/example/feint/Makefile
+++ b/lib/kokkos/example/feint/Makefile
@@ -1,30 +1,28 @@
 KOKKOS_PATH = ../..
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+vpath %.cpp ${KOKKOS_SRC_PATH}/example/fixture ${KOKKOS_SRC_PATH}/example/feint
 
-vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/feint
-
-EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/feint/*.hpp)
+EXAMPLE_HEADERS = $(wildcard $(KOKKOS_SRC_PATH)/example/common/*.hpp ${KOKKOS_SRC_PATH}/example/fixture/*.hpp ${KOKKOS_SRC_PATH}/example/feint/*.hpp)
 
 default: build_all
 	echo "End Build"
-        
-include $(KOKKOS_PATH)/Makefile.kokkos
 
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= 
+
+include $(KOKKOS_PATH)/Makefile.kokkos        
+
 KOKKOS_CXXFLAGS +=	\
-	-I${KOKKOS_PATH}/example/common	\
-	-I${KOKKOS_PATH}/example/fixture	\
-	-I${KOKKOS_PATH}/example/feint
+	-I${KOKKOS_SRC_PATH}/example/common	\
+	-I${KOKKOS_SRC_PATH}/example/fixture	\
+	-I${KOKKOS_SRC_PATH}/example/feint
 
 EXE_EXAMPLE_FEINT = KokkosExample_Feint
 OBJ_EXAMPLE_FEINT = BoxElemPart.o main.o
diff --git a/lib/kokkos/example/fenl/Makefile b/lib/kokkos/example/fenl/Makefile
index 5d8e6fd3034ec7c20044552a5688fc6751e374fb..24a0e61c18c4ce9efa1568534cfb4ad8bfccde9a 100644
--- a/lib/kokkos/example/fenl/Makefile
+++ b/lib/kokkos/example/fenl/Makefile
@@ -10,22 +10,18 @@ EXAMPLE_HEADERS = $(wildcard $(SRC_DIR)/../common/*.hpp ${SRC_DIR}/../fixture/*.
 default: build_all
 	echo "End Build"
 
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-# KOKKOS_INTERNAL_USE_CUDA is not exported to installed Makefile.kokkos
-# use KOKKOS_DEVICE here
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 KOKKOS_CXXFLAGS +=	\
 	-I${SRC_DIR}/../common	\
 	-I${SRC_DIR}/../fixture	\
diff --git a/lib/kokkos/example/fenl/fenl_impl.hpp b/lib/kokkos/example/fenl/fenl_impl.hpp
index 64070ce55fdc1cf7b94d631a0f29b32eecfab357..15583c10e9f5568e921d838284aa28cc8521f3f4 100644
--- a/lib/kokkos/example/fenl/fenl_impl.hpp
+++ b/lib/kokkos/example/fenl/fenl_impl.hpp
@@ -192,7 +192,7 @@ Perf fenl(
 
   //------------------------------------
 
-  const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ;
+  const int print_flag = use_print && std::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ;
 
   int comm_rank ;
   int comm_size ;
diff --git a/lib/kokkos/example/fixture/Makefile b/lib/kokkos/example/fixture/Makefile
index 990f4f18e7d420f2cb7c991ba2d9732f50ef1c56..5e684e344056cde31aec46c2a088e39c1c3bc2f9 100644
--- a/lib/kokkos/example/fixture/Makefile
+++ b/lib/kokkos/example/fixture/Makefile
@@ -1,29 +1,27 @@
 KOKKOS_PATH = ../..
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+vpath %.cpp ${KOKKOS_SRC_PATH}/example/fixture
 
-vpath %.cpp ${KOKKOS_PATH}/example/fixture
-
-EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp )
+EXAMPLE_HEADERS = $(wildcard $(KOKKOS_SRC_PATH)/example/common/*.hpp ${KOKKOS_SRC_PATH}/example/fixture/*.hpp )
 
 default: build_all
 	echo "End Build"
-        
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
 else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
+  CXX = g++
 endif
 
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos        
+
 KOKKOS_CXXFLAGS +=	\
-	-I${KOKKOS_PATH}/example/common	\
-	-I${KOKKOS_PATH}/example/fixture
+	-I${KOKKOS_SRC_PATH}/example/common	\
+	-I${KOKKOS_SRC_PATH}/example/fixture
 
 EXE_EXAMPLE_FIXTURE = KokkosExample_Fixture
 OBJ_EXAMPLE_FIXTURE = Main.o TestFixture.o BoxElemPart.o
diff --git a/lib/kokkos/example/global_2_local_ids/Makefile b/lib/kokkos/example/global_2_local_ids/Makefile
index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644
--- a/lib/kokkos/example/global_2_local_ids/Makefile
+++ b/lib/kokkos/example/global_2_local_ids/Makefile
@@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
 default: build
 	echo "Start Build"
 
-# use installed Makefile.kokkos
-include $(KOKKOS_PATH)/Makefile.kokkos
-
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = $(NVCC_WRAPPER)
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS = 
-EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "Cuda,OpenMP"
-#KOKKOS_ARCH = "SNB,Kepler35"
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
+  EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
 else
-CXX = g++
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS =  
-EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "OpenMP"
-#KOKKOS_ARCH = "SNB"
+  CXX = g++
+  EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
 endif
 
+CXXFLAGS = -O3 -I$(SRC_DIR)
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 DEPFLAGS = -M
 
 LIB =
diff --git a/lib/kokkos/example/grow_array/Makefile b/lib/kokkos/example/grow_array/Makefile
index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644
--- a/lib/kokkos/example/grow_array/Makefile
+++ b/lib/kokkos/example/grow_array/Makefile
@@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
 default: build
 	echo "Start Build"
 
-# use installed Makefile.kokkos
-include $(KOKKOS_PATH)/Makefile.kokkos
-
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = $(NVCC_WRAPPER)
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS = 
-EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "Cuda,OpenMP"
-#KOKKOS_ARCH = "SNB,Kepler35"
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
+  EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
 else
-CXX = g++
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS =  
-EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "OpenMP"
-#KOKKOS_ARCH = "SNB"
+  CXX = g++
+  EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
 endif
 
+CXXFLAGS = -O3 -I$(SRC_DIR)
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 DEPFLAGS = -M
 
 LIB =
diff --git a/lib/kokkos/example/ichol/Makefile b/lib/kokkos/example/ichol/Makefile
deleted file mode 100644
index 57e972f042d94c337e8d6b73fffcec2e0d40ad90..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/Makefile
+++ /dev/null
@@ -1,63 +0,0 @@
-SCOTCH_PATH = /home/hcedwar/scotch/6.0.0
-KOKKOS_PATH = ../..
-
-vpath %.cpp ${KOKKOS_PATH}/example/ichol/src ${KOKKOS_PATH}/example/ichol/example 
-
-EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/ichol/src/*.hpp ${KOKKOS_PATH}/example/ichol/example/*.hpp )
-
-default: build_all
-	echo "End Build"
-        
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
-else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
-endif
-
-KOKKOS_CXXFLAGS +=	\
-	-I${KOKKOS_PATH}/example/ichol/src	\
-	-I${KOKKOS_PATH}/example/ichol/example	\
-	-I${SCOTCH_PATH}/include
-
-EXE_EXAMPLE_ICHOL_THREADS = KokkosExample_ichol_threads
-OBJ_EXAMPLE_ICHOL_THREADS = example_chol_performance_device_pthread.o
-
-EXE_EXAMPLE_ICHOL_CUDA = KokkosExample_ichol_cuda
-OBJ_EXAMPLE_ICHOL_CUDA = example_chol_performance_device_cuda.o
-
-TARGETS = $(EXE_EXAMPLE_ICHOL_THREADS) $(EXE_EXAMPLE_ICHOL_CUDA)
-
-#TEST_TARGETS =
-
-$(EXE_EXAMPLE_ICHOL_THREADS) : $(OBJ_EXAMPLE_ICHOL_THREADS) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) \
-	$(OBJ_EXAMPLE_ICHOL_THREADS) $(KOKKOS_LIBS) $(LIB) \
-	-L${SCOTCH_PATH}/lib -lscotch  -lscotcherr  -lscotcherrexit \
-	-o $(EXE_EXAMPLE_ICHOL_THREADS)
-
-$(EXE_EXAMPLE_ICHOL_CUDA) : $(OBJ_EXAMPLE_ICHOL_CUDA) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) \
-	$(OBJ_EXAMPLE_ICHOL_CUDA) $(KOKKOS_LIBS) $(LIB) \
-	-L${SCOTCH_PATH}/lib -lscotch  -lscotcherr  -lscotcherrexit \
-	-o $(EXE_EXAMPLE_ICHOL_CUDA)
-
-build_all : $(TARGETS)
-
-test : build_all
-
-clean: kokkos-clean
-	rm -f *.o $(TARGETS)
-
-# Compilation rules
-
-%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
-
diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp b/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp
deleted file mode 100644
index ca819e4f97028eb0782c7e6c5638945d40f7597b..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-#pragma once
-#ifndef __EXAMPLE_CHOL_PERFORMANCE_DEVICE_HPP__
-#define __EXAMPLE_CHOL_PERFORMANCE_DEVICE_HPP__
-
-#include <Kokkos_Core.hpp>
-#include <impl/Kokkos_Timer.hpp>
-
-#include "util.hpp"
-
-#include "crs_matrix_base.hpp"
-#include "crs_matrix_view.hpp"
-#include "crs_row_view.hpp"
-
-#include "graph_helper_scotch.hpp"
-#include "symbolic_factor_helper.hpp"
-#include "crs_matrix_helper.hpp"
-
-#include "task_view.hpp"
-
-#include "task_factory.hpp"
-
-#include "chol.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<typename ValueType,
-           typename OrdinalType,
-           typename SizeType = OrdinalType,
-           typename SpaceType = void>
-  int exampleCholPerformanceDevice(const string file_input,
-                                   const int treecut,
-                                   const int prunecut,
-                                   const int seed,
-                                   const int nthreads,
-                                   const int max_task_dependence,
-                                   const int max_concurrency,
-                                   const int team_size,
-                                   const int fill_level,
-                                   const int league_size,
-                                   const bool skip_serial,
-                                   const bool verbose) {
-    typedef ValueType   value_type;
-    typedef OrdinalType ordinal_type;
-    typedef SizeType    size_type;
-    typedef typename
-       Kokkos::Impl::is_space< SpaceType >::host_mirror_space::execution_space
-         HostSpaceType ;
-
-    typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
-      Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
-
-    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType>
-      CrsMatrixBaseType;
-
-    typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType>
-      CrsMatrixBaseHostType;
-
-    typedef Kokkos::MemoryUnmanaged MemoryUnmanaged ;
-
-    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryUnmanaged >
-      CrsMatrixNestedType;
-
-
-    typedef GraphHelper_Scotch<CrsMatrixBaseHostType> GraphHelperType;
-    typedef SymbolicFactorHelper<CrsMatrixBaseHostType> SymbolicFactorHelperType;
-
-    typedef CrsMatrixView<CrsMatrixNestedType> CrsMatrixViewType;
-    typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
-
-    typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType> CrsHierMatrixBaseType;
-
-    typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
-    typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;
-
-    int r_val = 0;
-
-    Kokkos::Timer timer;
-    double
-      t_import = 0.0,
-      t_reorder = 0.0,
-      t_symbolic = 0.0,
-      t_flat2hier = 0.0,
-      t_factor_task = 0.0;
-
-    cout << "CholPerformanceDevice:: import input file = " << file_input << endl;
-    CrsMatrixBaseHostType AA("AA");
-    {
-      timer.reset();
-
-      ifstream in;
-      in.open(file_input);
-      if (!in.good()) {
-        cout << "Failed in open the file: " << file_input << endl;
-        return ++r_val;
-      }
-      AA.importMatrixMarket(in);
-
-      t_import = timer.seconds();
-
-      if (verbose) {
-        AA.showMe( std::cout );
-        std::cout << endl;
-      }
-    }
-    cout << "CholPerformanceDevice:: import input file::time = " << t_import << endl;
-
-    cout << "CholPerformanceDevice:: reorder the matrix" << endl;
-    CrsMatrixBaseHostType PA("Permuted AA");
-
-    // '*_UU' is the permuted base upper triangular matrix
-    CrsMatrixBaseHostType host_UU("host_UU");
-    CrsMatrixBaseType     device_UU("UU");
-    CrsHierMatrixBaseType device_HU("HU");;
-
-    // typename CrsMatrixBaseHostType host_UU("host_UU");
-
-    {
-      typename GraphHelperType::size_type_array rptr("Graph::RowPtrArray", AA.NumRows() + 1);
-      typename GraphHelperType::ordinal_type_array cidx("Graph::ColIndexArray", AA.NumNonZeros());
-
-      AA.convertGraph(rptr, cidx);
-      GraphHelperType S("ScotchHelper",
-                        AA.NumRows(),
-                        rptr,
-                        cidx,
-                        seed);
-      {
-        timer.reset();
-
-        S.computeOrdering(treecut, 0);
-        S.pruneTree(prunecut);
-
-        PA.copy(S.PermVector(), S.InvPermVector(), AA);
-
-        t_reorder = timer.seconds();
-
-        if (verbose) {
-          S.showMe( std::cout );
-          std::cout << std::endl ;
-          PA.showMe( std::cout );
-          std::cout << std::endl ;
-        }
-      }
-
-      // Symbolic factorization adds non-zero entries
-      // for factorization levels.
-      // Runs on the host process and currently requires std::sort.
-
-      cout << "CholPerformanceDevice:: reorder the matrix::time = " << t_reorder << endl;
-      {
-        SymbolicFactorHelperType F(PA, league_size);
-        timer.reset();
-        F.createNonZeroPattern(fill_level, Uplo::Upper, host_UU);
-        t_symbolic = timer.seconds();
-        cout << "CholPerformanceDevice:: AA (nnz) = " << AA.NumNonZeros() << ", host_UU (nnz) = " << host_UU.NumNonZeros() << endl;
-
-        if (verbose) {
-          F.showMe( std::cout );
-          std::cout << std::endl ;
-          host_UU.showMe( std::cout );
-          std::cout << std::endl ;
-        }
-      }
-      cout << "CholPerformanceDevice:: symbolic factorization::time = " << t_symbolic << endl;
-
-    //----------------------------------------------------------------------
-    // Allocate device_UU conformal to host_UU 
-    // and deep_copy host_UU arrays to device_UU arrays.
-    // Set up device_HU referencing blocks of device_UU
-
-      {
-        timer.reset();
-
-        device_UU.copy( host_UU );
-
-        CrsMatrixHelper::flat2hier(Uplo::Upper, device_UU, device_HU,
-                                   S.NumBlocks(),
-                                   S.RangeVector(),
-                                   S.TreeVector());
-
-        // Filling non-zero block matrixes' row ranges within block view.
-        // This is performed entirely in the 'device_HU' space.
-
-        CrsMatrixHelper::fillRowViewArray( device_HU );
-
-        t_flat2hier = timer.seconds();
-
-        cout << "CholPerformanceDevice:: Hier (dof, nnz) = " << device_HU.NumRows() << ", " << device_HU.NumNonZeros() << endl;
-      }
-      cout << "CholPerformanceDevice:: copy base matrix and construct hierarchical matrix::time = " << t_flat2hier << endl;
-    }
-
-    cout << "CholPerformanceDevice:: max concurrency = " << max_concurrency << endl;
-
-    const size_t max_task_size = 4*sizeof(CrsTaskViewType)+128;
-    cout << "CholPerformanceDevice:: max task size   = " << max_task_size << endl;
-
-    //----------------------------------------------------------------------
-    // From here onward all work is on the device.
-    //----------------------------------------------------------------------
-
-    {
-      typename TaskFactoryType::policy_type policy(max_concurrency,
-                                                   max_task_size,
-                                                   max_task_dependence,
-                                                   team_size);
-
-      cout << "CholPerformanceDevice:: ByBlocks factorize the matrix:: team_size = " << team_size << endl;
-      CrsHierTaskViewType H( device_HU );
-      {
-        timer.reset();
-        {
-          // auto future = policy.proc_create_team(Chol<Uplo::Upper,AlgoChol::ByBlocks>::
-          auto future = policy.proc_create_team(Chol<Uplo::Upper,AlgoChol::ByBlocks,Variant::Two>::
-                                                TaskFunctor<CrsHierTaskViewType>(policy,H), 0);
-          policy.spawn(future);
-          Kokkos::Experimental::wait(policy);
-        }
-        t_factor_task += timer.seconds();
-
-        cout << "CholPerformanceDevice:: policy.allocated_task_count = "
-             << policy.allocated_task_count()
-             << endl ;
-
-        if (verbose) {
-          host_UU.copy( device_UU );
-          host_UU.showMe( std::cout );
-          std::cout << endl;
-        }
-      }
-      cout << "CholPerformanceDevice:: ByBlocks factorize the matrix::time = " << t_factor_task << endl;
-    }
-
-    return r_val;
-  }
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp b/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp
deleted file mode 100644
index 3a0df586b5af15a9c56582d216ecac6e5221853d..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <Kokkos_Core.hpp>
-
-#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp>
-
-using namespace std;
-
-typedef double value_type;
-typedef int    ordinal_type;
-typedef int    size_type;
-
-#include "example_chol_performance_device.hpp"
-
-using namespace Tacho;
-
-int main (int argc, char *argv[]) {
-
-  string file_input = "test.mtx";                                                                             
-  int nthreads = 1;                                                                                           
-  int max_task_dependence = 3;                                                                                
-  int max_concurrency = 1024;                                                                                 
-  int team_size = 1;                                                                                          
-  int fill_level = 0;
-  int treecut = 0;
-  int prunecut = 0;
-  int seed = 0;
-  int league_size = 1;                                                                                        
-  bool verbose = false;                                                                                       
-  for (int i=0;i<argc;++i) {                                                                                  
-    if ((strcmp(argv[i],"--file-input")         ==0)) { file_input          = argv[++i];       continue;}     
-    if ((strcmp(argv[i],"--nthreads")           ==0)) { nthreads            = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--max-task-dependence")==0)) { max_task_dependence = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--max-concurrency")    ==0)) { max_concurrency     = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--team-size")          ==0)) { team_size           = atoi(argv[++i]); continue;}     
-
-    if ((strcmp(argv[i],"--fill-level")         ==0)) { fill_level          = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--league-size")        ==0)) { league_size         = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--treecut")            ==0)) { treecut             = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--prunecut")           ==0)) { prunecut            = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--seed")               ==0)) { seed                = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--enable-verbose")     ==0)) { verbose             = true;            continue;}     
-  }                                                                                                           
-
-  int r_val = 0;
-  {
-    typedef Kokkos::Cuda exec_space;
-
-    Kokkos::DefaultHostExecutionSpace::initialize(nthreads);
-
-    exec_space::initialize();
-    exec_space::print_configuration(cout, true);
-
-    r_val = exampleCholPerformanceDevice
-      <value_type,ordinal_type,size_type,exec_space>
-      (file_input,
-       treecut,
-       prunecut,
-       seed,
-       nthreads,
-       max_task_dependence, max_concurrency, team_size,
-       fill_level, league_size,
-       (nthreads != 1), // skip_serial
-       verbose);
-
-    exec_space::finalize();
-
-    Kokkos::DefaultHostExecutionSpace::finalize();
-  }
-
-  return r_val;
-}
diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp b/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp
deleted file mode 100644
index 68f520cf6620888c2a8de2f8cabe06a5b9e8b607..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <Kokkos_Core.hpp>
-
-#include <Kokkos_Threads.hpp>
-#include <Threads/Kokkos_Threads_TaskPolicy.hpp>
-
-using namespace std;
-
-typedef double value_type;
-typedef int    ordinal_type;
-typedef int    size_type;
-
-typedef Kokkos::Threads exec_space;
-
-#include "example_chol_performance_device.hpp"
-
-using namespace Tacho;
-
-int main (int argc, char *argv[]) {
-
-  string file_input = "test.mtx";                                                                             
-  int nthreads = 1;                                                                                           
-  int max_task_dependence = 3;                                                                                
-  int max_concurrency = 1024;                                                                                 
-  int team_size = 1;                                                                                          
-  int fill_level = 0;
-  int treecut = 0;
-  int prunecut = 0;
-  int seed = 0;
-  int league_size = 1;                                                                                        
-  bool verbose = false;                                                                                       
-  for (int i=0;i<argc;++i) {                                                                                  
-    if ((strcmp(argv[i],"--file-input")         ==0)) { file_input          = argv[++i];       continue;}     
-    if ((strcmp(argv[i],"--nthreads")           ==0)) { nthreads            = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--max-task-dependence")==0)) { max_task_dependence = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--max-concurrency")    ==0)) { max_concurrency     = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--team-size")          ==0)) { team_size           = atoi(argv[++i]); continue;}     
-
-    if ((strcmp(argv[i],"--fill-level")         ==0)) { fill_level          = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--league-size")        ==0)) { league_size         = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--treecut")            ==0)) { treecut             = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--prunecut")           ==0)) { prunecut            = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--seed")               ==0)) { seed                = atoi(argv[++i]); continue;}     
-    if ((strcmp(argv[i],"--enable-verbose")     ==0)) { verbose             = true;            continue;}     
-  }                                                                                                           
-
-  int r_val = 0;
-  {
-    exec_space::initialize(nthreads);
-    exec_space::print_configuration(cout, true);
-
-    r_val = exampleCholPerformanceDevice
-      <value_type,ordinal_type,size_type,exec_space>
-      (file_input,
-       treecut,
-       prunecut,
-       seed,
-       nthreads,
-       max_task_dependence, max_concurrency, team_size,
-       fill_level, league_size,
-       (nthreads != 1), // skip_serial
-       verbose);
-
-    exec_space::finalize();
-  }
-
-  return r_val;
-}
diff --git a/lib/kokkos/example/ichol/src/chol.hpp b/lib/kokkos/example/ichol/src/chol.hpp
deleted file mode 100644
index e8aa4e9189ffb607c91cc2b86811084b69a45393..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/chol.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma once
-#ifndef __CHOL_HPP__
-#define __CHOL_HPP__
-
-/// \file chol.hpp
-/// \brief Incomplete Cholesky factorization front interface.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "control.hpp"
-#include "partition.hpp"
-
-namespace Tacho { 
-
-  using namespace std;
-
-  // tasking interface
-  // * default behavior is for non-by-blocks tasks
-  // * control is only used for by-blocks algorithms
-  // ===============================================
-  template<int ArgUplo, int ArgAlgo, 
-           int ArgVariant = Variant::One,                  
-           template<int,int> class ControlType = Control>  
-  class Chol {
-  public:
-    
-    // function interface
-    // ==================
-    template<typename ExecViewType>
-    KOKKOS_INLINE_FUNCTION
-    static int invoke(typename ExecViewType::policy_type &policy, 
-                      const typename ExecViewType::policy_type::member_type &member, 
-                      typename ExecViewType::matrix_type &A);
-
-    // task-data parallel interface
-    // ============================
-    template<typename ExecViewType>
-    class TaskFunctor {
-    public:
-      typedef typename ExecViewType::policy_type policy_type;
-      typedef typename policy_type::member_type member_type;
-      typedef int value_type;
-      
-    private:
-      typename ExecViewType::matrix_type _A;
-      
-      policy_type _policy;
-      
-    public:
-      KOKKOS_INLINE_FUNCTION
-      TaskFunctor(const policy_type & P ,
-                  const typename ExecViewType::matrix_type & A)
-        : _A(A),
-          _policy(P)
-      { } 
-      
-      string Label() const { return "Chol"; }
-      
-      // task execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(value_type &r_val) {
-        r_val = Chol::invoke<ExecViewType>(_policy, _policy.member_single(), _A);
-      }
-
-      // task-data execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(const member_type &member, value_type &r_val) {
-
-        const int result = Chol::invoke<ExecViewType>(_policy, member, _A);
-
-        if ( 0 == member.team_rank() ) { r_val = result ; }
-
-      }
-
-    };
-
-  };
-}
-
-
-// unblocked version blas operations
-#include "scale.hpp"
-
-// blocked version blas operations
-#include "gemm.hpp"
-#include "trsm.hpp"
-#include "herk.hpp"
-
-// cholesky
-#include "chol_u.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/chol_u.hpp b/lib/kokkos/example/ichol/src/chol_u.hpp
deleted file mode 100644
index 0465ef8f35c8574189c767b6f97dfc7a6344f2cb..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/chol_u.hpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-#ifndef __CHOL_U_HPP__
-#define __CHOL_U_HPP__
-
-/// \file chol_u.hpp
-/// \brief Upper Cholesky factorization variations
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-// testing task-data parallelism
-// #include "chol_u_unblocked_dummy.hpp"
-
-// flame style implementation
-//#include "chol_unblocked.hpp"  
-//#include "chol_u_blocked.hpp"
-
-// triple for loop
-#include "chol_u_unblocked_opt1.hpp"
-#include "chol_u_unblocked_opt2.hpp"
-
-// partitioned block algorithms: see control.hpp
-#include "chol_u_right_look_by_blocks.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp b/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp
deleted file mode 100644
index e21bafa9f1db5e9dda1a0e24f21a4552f011d27a..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp
+++ /dev/null
@@ -1,394 +0,0 @@
-#pragma once
-#ifndef __CHOL_U_RIGHT_LOOK_BY_BLOCKS_HPP__
-#define __CHOL_U_RIGHT_LOOK_BY_BLOCKS_HPP__
-
-/// \file chol_u_right_look_by_blocks.hpp
-/// \brief Cholesky factorization by-blocks
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-/// The Partitioned-Block Matrix (PBM) is sparse and a block itself is a view of a sparse matrix. 
-/// The algorithm generates tasks with a given sparse block matrix structure.
-
-// basic utils
-#include "util.hpp"
-#include "control.hpp"
-#include "partition.hpp"
-
-namespace Tacho { 
-  
-  using namespace std;
-
-  template< typename CrsTaskViewType >
-  KOKKOS_INLINE_FUNCTION
-  int releaseFutures( typename CrsTaskViewType::matrix_type & A )
-    {
-      typedef typename CrsTaskViewType::ordinal_type      ordinal_type;
-      typedef typename CrsTaskViewType::row_view_type     row_view_type;
-      typedef typename CrsTaskViewType::future_type       future_type;
-      
-      row_view_type a(A,0);
-      
-      const ordinal_type nnz = a.NumNonZeros();
-
-      for (ordinal_type j=0;j<nnz;++j) {
-        a.Value(j).setFuture( future_type() );
-      }
-
-      return nnz ;
-    }
-  
-  // ========================================
-  // detailed workflow of by-blocks algorithm
-  // ========================================
-  template<int ArgVariant, 
-           template<int,int> class ControlType,
-           typename CrsTaskViewType>
-  class CholUpperRightLookByBlocks {
-  public:
-    KOKKOS_INLINE_FUNCTION
-    static int genScalarTask(typename CrsTaskViewType::policy_type &policy,
-                             typename CrsTaskViewType::matrix_type &A) {
-      typedef typename CrsTaskViewType::value_type        value_type;
-      typedef typename CrsTaskViewType::row_view_type     row_view_type;
-      
-      typedef typename CrsTaskViewType::future_type       future_type;
-      typedef typename CrsTaskViewType::task_factory_type task_factory_type;
-      
-      row_view_type a(A, 0); 
-      value_type &aa = a.Value(0);
-      
-      // construct a task
-      future_type f = task_factory_type::create(policy,
-                                                typename Chol<Uplo::Upper,
-                                                CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Chol)>
-                                                ::template TaskFunctor<value_type>(policy,aa));
-      
-
-if ( false ) {
- printf("Chol [%d +%d)x[%d +%d) spawn depend %d\n"
-       , aa.OffsetRows()
-       , aa.NumRows()
-       , aa.OffsetCols()
-       , aa.NumCols()
-       , int( ! aa.Future().is_null() )
-       );
-}
-
-      // manage dependence
-      task_factory_type::addDependence(policy, f, aa.Future());
-      aa.setFuture(f);
-
-      // spawn a task
-      task_factory_type::spawn(policy, f, true /* high priority */ );
-      
-      return 1;
-    }
-    
-    KOKKOS_INLINE_FUNCTION
-    static int genTrsmTasks(typename CrsTaskViewType::policy_type &policy,
-                            typename CrsTaskViewType::matrix_type &A,
-                            typename CrsTaskViewType::matrix_type &B) {
-      typedef typename CrsTaskViewType::ordinal_type      ordinal_type;
-      typedef typename CrsTaskViewType::row_view_type     row_view_type;
-      typedef typename CrsTaskViewType::value_type        value_type;
-
-      typedef typename CrsTaskViewType::future_type       future_type;
-      typedef typename CrsTaskViewType::task_factory_type task_factory_type;
-      
-      row_view_type a(A,0), b(B,0); 
-      value_type &aa = a.Value(0);
-
-if ( false ) {
-  printf("genTrsmTasks after aa.Future().reference_count = %d\n"
-        , aa.Future().reference_count());
-}
-      const ordinal_type nnz = b.NumNonZeros();
-      for (ordinal_type j=0;j<nnz;++j) {
-        typedef typename
-           Trsm< Side::Left,Uplo::Upper,Trans::ConjTranspose,
-                 CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Trsm)>
-           ::template TaskFunctor<double,value_type,value_type>
-             FunctorType ;
-
-        value_type &bb = b.Value(j);
-        
-        future_type f = task_factory_type
-          ::create(policy, FunctorType(policy,Diag::NonUnit, 1.0, aa, bb));
-        
-if ( false ) {
- printf("Trsm [%d +%d)x[%d +%d) spawn depend %d %d\n"
-       , bb.OffsetRows()
-       , bb.NumRows()
-       , bb.OffsetCols()
-       , bb.NumCols()
-       , int( ! aa.Future().is_null() )
-       , int( ! bb.Future().is_null() )
-       );
-}
-
-        // trsm dependence
-        task_factory_type::addDependence(policy, f, aa.Future());
-        
-        // self
-        task_factory_type::addDependence(policy, f, bb.Future());
-        
-        // place task signature on b
-        bb.setFuture(f);
-        
-        // spawn a task
-        task_factory_type::spawn(policy, f, true /* high priority */);              
-      }
-
-if ( false ) {
-  printf("genTrsmTasks after aa.Future().reference_count = %d\n"
-        , aa.Future().reference_count());
-}
-      
-      return nnz ;
-    }
-    
-    KOKKOS_INLINE_FUNCTION
-    static int genHerkTasks(typename CrsTaskViewType::policy_type &policy,
-                            typename CrsTaskViewType::matrix_type &A,
-                            typename CrsTaskViewType::matrix_type &C) {
-      typedef typename CrsTaskViewType::ordinal_type      ordinal_type;
-      typedef typename CrsTaskViewType::value_type        value_type;
-      typedef typename CrsTaskViewType::row_view_type     row_view_type;
-      
-      typedef typename CrsTaskViewType::future_type       future_type;
-      typedef typename CrsTaskViewType::task_factory_type task_factory_type;
-      
-      // case that X.transpose, A.no_transpose, Y.no_transpose
-      
-      row_view_type a(A,0), c; 
-      
-      const ordinal_type nnz = a.NumNonZeros();
-      ordinal_type herk_count = 0 ; 
-      ordinal_type gemm_count = 0 ; 
-
-      // update herk
-      for (ordinal_type i=0;i<nnz;++i) {
-        const ordinal_type row_at_i = a.Col(i);
-        value_type &aa = a.Value(i);
-        
-        c.setView(C, row_at_i);
-        
-        ordinal_type idx = 0;
-        for (ordinal_type j=i;j<nnz && (idx > -2);++j) {
-          const ordinal_type col_at_j = a.Col(j);
-          value_type &bb = a.Value(j);
-          
-          if (row_at_i == col_at_j) {
-            idx = c.Index(row_at_i, idx);
-            if (idx >= 0) {
-              ++herk_count ;
-              value_type &cc = c.Value(idx);
-              future_type f = task_factory_type
-                ::create(policy, 
-                         typename Herk<Uplo::Upper,Trans::ConjTranspose,
-                         CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Herk)>
-                         ::template TaskFunctor<double,value_type,value_type>(policy,-1.0, aa, 1.0, cc));
-            
-
-if ( false ) {
- printf("Herk [%d +%d)x[%d +%d) spawn %d %d\n"
-       , cc.OffsetRows()
-       , cc.NumRows()
-       , cc.OffsetCols()
-       , cc.NumCols()
-       , int( ! aa.Future().is_null() )
-       , int( ! cc.Future().is_null() )
-       );
-}
-
-              // dependence
-              task_factory_type::addDependence(policy, f, aa.Future());              
-            
-              // self
-              task_factory_type::addDependence(policy, f, cc.Future());
-            
-              // place task signature on y
-              cc.setFuture(f);
-
-              // spawn a task
-              task_factory_type::spawn(policy, f);
-            }
-          } else {
-            idx = c.Index(col_at_j, idx);
-            if (idx >= 0) {
-              ++gemm_count ;
-              value_type &cc = c.Value(idx);
-              future_type f = task_factory_type
-                ::create(policy, 
-                         typename Gemm<Trans::ConjTranspose,Trans::NoTranspose,
-                         CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Gemm)>
-                         ::template TaskFunctor<double,value_type,value_type,value_type>(policy,-1.0, aa, bb, 1.0, cc));
-            
-
-if ( false ) {
- printf("Gemm [%d +%d)x[%d +%d) spawn %d %d %d\n"
-       , cc.OffsetRows()
-       , cc.NumRows()
-       , cc.OffsetCols()
-       , cc.NumCols()
-       , int( ! aa.Future().is_null() )
-       , int( ! bb.Future().is_null() )
-       , int( ! cc.Future().is_null() )
-       );
-}
- 
-              // dependence
-              task_factory_type::addDependence(policy, f, aa.Future());
-              task_factory_type::addDependence(policy, f, bb.Future());
-            
-              // self
-              task_factory_type::addDependence(policy, f, cc.Future());
-            
-              // place task signature on y
-              cc.setFuture(f);
-            
-              // spawn a task
-              task_factory_type::spawn(policy, f);
-            }
-          }
-        }
-      }
-
-if ( false ) {
-printf("genHerkTask Herk(%ld) Gemm(%ld)\n",(long)herk_count,(long)gemm_count);
-}
-    
-      return herk_count + gemm_count ;
-    }
-    
-  };
-  
-  // specialization for different task generation in right looking by-blocks algorithm
-  // =================================================================================
-  template<int ArgVariant, template<int,int> class ControlType>
-  class Chol<Uplo::Upper,AlgoChol::RightLookByBlocks,ArgVariant,ControlType> {
-  public:
-
-    // function interface
-    // ==================
-    template<typename ExecViewType>
-    KOKKOS_INLINE_FUNCTION
-    static int invoke(typename ExecViewType::policy_type &policy, 
-                      const typename ExecViewType::policy_type::member_type &member, 
-                      typename ExecViewType::matrix_type & A,
-                      int checkpoint )
-      {
-        typedef typename ExecViewType::row_view_type  row_view_type ;
-
-        enum { CYCLE = 2 };
-
-        typename ExecViewType::matrix_type
-          ATL, ATR,      A00, A01, A02,
-          ABL, ABR,      A10, A11, A12,
-                         A20, A21, A22;
-
-        Part_2x2(A,  ATL, ATR,
-                 /**/ABL, ABR,
-                 checkpoint, checkpoint, Partition::TopLeft);
-
-        int tasks_spawned = 0 ;
-        int futures_released = 0 ;
-
-        for ( int i = 0 ; i < CYCLE && ATL.NumRows() < A.NumRows() ; ++i ) {
-          Part_2x2_to_3x3(ATL, ATR, /**/  A00, A01, A02,
-                          /*******/ /**/  A10, A11, A12,
-                          ABL, ABR, /**/  A20, A21, A22,
-                          1, 1, Partition::BottomRight);
-          // -----------------------------------------------------
-          // Spawning tasks:
-
-          // A11 = chol(A11) : #task = 1
-          tasks_spawned +=
-          CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType>
-            ::genScalarTask(policy, A11);
-          
-          // A12 = inv(triu(A11)') * A12 : #tasks = non-zero row blocks
-          tasks_spawned +=
-          CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType>
-            ::genTrsmTasks(policy, A11, A12);
-
-          // A22 = A22 - A12' * A12 : #tasks = highly variable
-          tasks_spawned +=
-          CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType>
-            ::genHerkTasks(policy, A12, A22);
-
-          // -----------------------------------------------------
-          // Can release futures of A11 and A12 
-
-          futures_released += releaseFutures<ExecViewType>( A11 );
-          futures_released += releaseFutures<ExecViewType>( A12 );
-
-if ( false ) {
-  printf("Chol iteration(%d) task_count(%d) cumulative: spawn(%d) release(%d)\n"
-        , int(ATL.NumRows())
-        , policy.allocated_task_count()
-        , tasks_spawned , futures_released
-        );
-}
-
-          // -----------------------------------------------------
-          Merge_3x3_to_2x2(A00, A01, A02, /**/ ATL, ATR,
-                           A10, A11, A12, /**/ /******/
-                           A20, A21, A22, /**/ ABL, ABR,
-                           Partition::TopLeft);
-
-        }
-      
-      return ATL.NumRows();
-    }
-    
-    // task-data parallel interface
-    // ============================
-    template<typename ExecViewType>
-    class TaskFunctor {
-    public:
-      typedef typename ExecViewType::policy_type  policy_type;
-      typedef typename ExecViewType::future_type  future_type;
-      typedef typename policy_type::member_type   member_type;
-      typedef int value_type;
-      
-    private:
-      typename ExecViewType::matrix_type _A;
-      
-      policy_type _policy;
-      int         _checkpoint ;
-      
-    public:
-      KOKKOS_INLINE_FUNCTION
-      TaskFunctor(const policy_type & P ,
-                  const typename ExecViewType::matrix_type & A)
-        : _A(A),
-          _policy(P),
-          _checkpoint(0)
-      { } 
-      
-      string Label() const { return "Chol"; }
-      
-      // task-data execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(const member_type &member, value_type &r_val)
-      {
-        if (member.team_rank() == 0) {
-          // Clear out previous dependence
-          _policy.clear_dependence( this );
-
-          _checkpoint = Chol::invoke<ExecViewType>(_policy, member, _A, _checkpoint);
-
-          if ( _checkpoint < _A.NumRows() ) _policy.respawn_needing_memory(this);
-
-          r_val = 0 ;
-        }
-        return ;
-      }
-
-    };
-
-  };
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp
deleted file mode 100644
index 3bb99c71424f491bbb5bea712475fcac116ad24e..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp
+++ /dev/null
@@ -1,90 +0,0 @@
-#pragma once
-#ifndef __CHOL_U_UNBLOCKED_OPT1_HPP__
-#define __CHOL_U_UNBLOCKED_OPT1_HPP__
-
-/// \file chol_u_unblocked_opt1.hpp
-/// \brief Unblocked incomplete Chloesky factorization.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "partition.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<>
-  template<typename CrsExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  int
-  Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::One>
-  ::invoke(typename CrsExecViewType::policy_type &policy,
-           const typename CrsExecViewType::policy_type::member_type &member,
-           typename CrsExecViewType::matrix_type &A) {
-
-    typedef typename CrsExecViewType::value_type        value_type;
-    typedef typename CrsExecViewType::ordinal_type      ordinal_type;
-    typedef typename CrsExecViewType::row_view_type     row_view_type;
-
-    // row_view_type r1t, r2t;
-
-    for (ordinal_type k=0;k<A.NumRows();++k) {
-      //r1t.setView(A, k);
-      row_view_type &r1t = A.RowView(k);
-
-      // extract diagonal from alpha11
-      value_type &alpha = r1t.Value(0);
-
-      if (member.team_rank() == 0) {
-        // if encounter null diag or wrong index, return -(row + 1)
-        if (abs(alpha) == 0.0 || r1t.Col(0) != k)
-          return -(k + 1);
-
-        // error handling should be more carefully designed
-
-        // sqrt on diag
-        // alpha = sqrt(real(alpha));
-        alpha = sqrt(alpha);
-      }
-      member.team_barrier();
-
-      const ordinal_type nnz_r1t = r1t.NumNonZeros();
-
-      if (nnz_r1t) {
-        // inverse scale
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t),
-                             [&](const ordinal_type j) {
-                               r1t.Value(j) /= alpha;
-                             });
-
-        member.team_barrier();
-
-        // hermitian rank update
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t),
-                             [&](const ordinal_type i) {
-                               const ordinal_type row_at_i = r1t.Col(i);
-                               // const value_type   val_at_i = conj(r1t.Value(i));
-                               const value_type   val_at_i = r1t.Value(i);
-                               
-                               //r2t.setView(A, row_at_i);
-                               row_view_type &r2t = A.RowView(row_at_i);
-                               ordinal_type idx = 0;
-                               
-                               for (ordinal_type j=i;j<nnz_r1t && (idx > -2);++j) {
-                                 const ordinal_type col_at_j = r1t.Col(j);
-                                 idx = r2t.Index(col_at_j, idx);
-                                 
-                                 if (idx >= 0) {
-                                   const value_type val_at_j = r1t.Value(j);
-                                   r2t.Value(idx) -= val_at_i*val_at_j;
-                                 }
-                               }
-                             });
-      }
-    }
-    return 0;
-  }
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp
deleted file mode 100644
index e7d1dc826235120a84af25ff239fb705c65489f0..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp
+++ /dev/null
@@ -1,154 +0,0 @@
-#pragma once
-#ifndef __CHOL_U_UNBLOCKED_OPT2_HPP__
-#define __CHOL_U_UNBLOCKED_OPT2_HPP__
-
-/// \file chol_u_unblocked_opt2.hpp
-/// \brief Unblocked incomplete Chloesky factorization; version for data parallel sharing L1 cache.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "partition.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<>
-  template<typename CrsExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  int
-  Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::Two>
-  ::invoke(typename CrsExecViewType::policy_type &policy,
-           const typename CrsExecViewType::policy_type::member_type &member,
-           typename CrsExecViewType::matrix_type &A) {
-
-    typedef typename CrsExecViewType::value_type        value_type;
-    typedef typename CrsExecViewType::ordinal_type      ordinal_type;
-    typedef typename CrsExecViewType::row_view_type     row_view_type;
-
-if ( false && member.team_rank() == 0 ) {
- printf("Chol [%d +%d)x[%d +%d) begin\n"
-       , A.OffsetRows()
-       , A.NumRows()
-       , A.OffsetCols()
-       , A.NumCols()
-       );
-}
-
-    // row_view_type r1t, r2t;
-
-    for (ordinal_type k=0;k<A.NumRows();++k) {
-      //r1t.setView(A, k);
-      row_view_type &r1t = A.RowView(k);
-
-      // extract diagonal from alpha11
-      value_type &alpha = r1t.Value(0);
-
-      if (member.team_rank() == 0) {
-        // if encounter null diag or wrong index, return -(row + 1)
-        if (abs(alpha) == 0.0 || r1t.Col(0) != k)
-          return -(k + 1);
-
-        // error handling should be more carefully designed
-
-        // sqrt on diag
-        // alpha = sqrt(real(alpha));
-        alpha = sqrt(alpha);
-      }
-      member.team_barrier();
-
-
-if ( false && member.team_rank() == 0 ) {
- printf("Chol [%d +%d)x[%d +%d) local row %d\n"
-       , A.OffsetRows()
-       , A.NumRows()
-       , A.OffsetCols()
-       , A.NumCols()
-       , int(k)
-       );
-}
-
-
-      const ordinal_type nnz_r1t = r1t.NumNonZeros();
-
-      if (nnz_r1t) {
-        // inverse scale
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t),
-                             [&](const ordinal_type j) {
-                               r1t.Value(j) /= alpha;
-                             });
-
-        member.team_barrier();
-
-
-if ( false && member.team_rank() == 0 ) {
- printf("Chol [%d +%d)x[%d +%d) local row %d nnz_r1t\n"
-       , A.OffsetRows()
-       , A.NumRows()
-       , A.OffsetCols()
-       , A.NumCols()
-       , int(k)
-       );
-}
-
-        // hermitian rank update
-        for (ordinal_type i=1;i<nnz_r1t;++i) {
-          const ordinal_type row_at_i = r1t.Col(i);
-          // const value_type   val_at_i = conj(r1t.Value(i));
-          const value_type   val_at_i = r1t.Value(i);
-
-          //r2t.setView(A, row_at_i);
-          row_view_type &r2t = A.RowView(row_at_i);
-
-          ordinal_type member_idx = 0 ;
-
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, i, nnz_r1t),
-                               [&](const ordinal_type j) {
-                                 if (member_idx > -2) {
-                                   const ordinal_type col_at_j = r1t.Col(j);
-                                   member_idx = r2t.Index(col_at_j, member_idx);
-                                   if (member_idx >= 0) {
-                                     const value_type   val_at_j = r1t.Value(j);
-                                     r2t.Value(member_idx) -= val_at_i*val_at_j;
-                                   }
-                                 }
-                               });
-        }
-      }
-
-
-if ( false ) {
-member.team_barrier();
-if ( member.team_rank() == 0 ) {
- printf("Chol [%d +%d)x[%d +%d) local row %d end\n"
-       , A.OffsetRows()
-       , A.NumRows()
-       , A.OffsetCols()
-       , A.NumCols()
-       , int(k)
-       );
-}
-}
-
-    }
-
-
-if ( false ) {
-member.team_barrier();
-if ( member.team_rank() == 0 ) {
- printf("Chol [%d +%d)x[%d +%d) end\n"
-       , A.OffsetRows()
-       , A.NumRows()
-       , A.OffsetCols()
-       , A.NumCols()
-       );
-}
-}
-
-
-    return 0;
-  }
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/control.hpp b/lib/kokkos/example/ichol/src/control.hpp
deleted file mode 100644
index bf5efef9fded8685f646d81855469f6f363b1e73..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/control.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#pragma once
-#ifndef __CONTROL_HPP__
-#define __CONTROL_HPP__
-
-#include "util.hpp"
-
-/// \file control.hpp
-/// \brief A collection of control trees composing high-level variants of algorithms.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-/// description is a bit wrong
-
-using namespace std;
-
-namespace Tacho {
-
-  // forward declaration for control tree
-  template<int ArgAlgo, int ArgVariant>
-  struct Control {
-    static constexpr int Self[2] = { ArgAlgo, ArgVariant };
-  };
-
-  // ----------------------------------------------------------------------------------
-
-  // - CholByblocks Variant 1
-  // * partitioned block matrix (blocks are sparse)
-  template<> struct Control<AlgoChol::ByBlocks,Variant::One> {
-    // chol var 1 : nested data parallel for is applied in the second inner loop
-    // chol var 2 : nested data parallel for is applied in the most inner loop
-    static constexpr int Chol[2] = { AlgoChol::UnblockedOpt,     Variant::Two };
-    static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One };
-    static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One };
-    static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One };
-  };
-
-  // - CholByBlocks Variant 2
-  // * diagonal blocks have nested dense blocks
-  template<> struct Control<AlgoChol::ByBlocks,Variant::Two> {
-    static constexpr int Chol[2] = { AlgoChol::UnblockedOpt, Variant::One }; 
-    static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One };
-    static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One };
-    static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One };
-  };
-
-  // - CholByBlocks Variant 3
-  // * all blocks have nested dense blocks (full supernodal algorithm)
-  // template<> struct Control<AlgoChol::ByBlocks,Variant::Three> {
-  //   static constexpr int Chol[2] = { AlgoChol::NestedDenseBlock, Variant::One }; 
-  //   static constexpr int Trsm[2] = { AlgoTrsm::NestedDenseBlock, Variant::One };
-  //   static constexpr int Herk[2] = { AlgoHerk::NestedDenseBlock, Variant::One };
-  //   static constexpr int Gemm[2] = { AlgoGemm::NestedDenseBlock, Variant::One };
-  // };
-
-  // - CholByBlocks Variant 4
-  // * diagonal blocks have nested hier dense blocks (hierarchical task scheduling)
-  // template<> struct Control<AlgoChol::ByBlocks,Variant::Four> {
-  //  static constexpr int Chol[2] = { AlgoChol::NestedDenseByBlocks, Variant::One }; 
-  //  static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked,    Variant::One };
-  //  static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked,    Variant::One };
-  //  static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked,    Variant::One };
-  //};
-
-  // - CholByBlocks Variant 5
-  // * diagonal blocks have nested hier dense blocks (hierarchical task scheduling)
-  // template<> struct Control<AlgoChol::ByBlocks,Variant::Four> {
-  //   static constexpr int Chol[2] = { AlgoChol::NestedDenseByBlocks, Variant::One }; 
-  //   static constexpr int Trsm[2] = { AlgoTrsm::NestedDenseByBlocks, Variant::One };
-  //   static constexpr int Herk[2] = { AlgoHerk::NestedDenseByBlocks, Variant::One };
-  //   static constexpr int Gemm[2] = { AlgoGemm::NestedDenseByBlocks, Variant::One };
-  // };
-
-  // ----------------------------------------------------------------------------------
-
-  // - CholNestedDenseBlock
-  // * branch control between sparse and dense operations
-  template<> struct Control<AlgoChol::NestedDenseBlock,Variant::One> {
-    static constexpr int CholSparse[2] = { AlgoChol::UnblockedOpt,   Variant::One };
-    static constexpr int CholDense[2]  = { AlgoChol::ExternalLapack, Variant::One }; 
-  };
-
-  // - CholNestedDenseBlock
-  // * branch control between sparse and dense operations
-  template<> struct Control<AlgoChol::NestedDenseByBlocks,Variant::One> {
-    static constexpr int CholSparse[2]        = { AlgoChol::UnblockedOpt,  Variant::One };
-    static constexpr int CholDenseByBlocks[2] = { AlgoChol::DenseByBlocks, Variant::One }; 
-  };
-
-  // ----------------------------------------------------------------------------------
-
-  // - CholDenseBlock
-  // * dense matrix Cholesky-by-blocks
-  template<> struct Control<AlgoChol::DenseByBlocks,Variant::One> {
-    static constexpr int Chol[2] = { AlgoChol::ExternalLapack, Variant::One };
-    static constexpr int Trsm[2] = { AlgoTrsm::ExternalBlas,   Variant::One };
-    static constexpr int Herk[2] = { AlgoHerk::ExternalBlas,   Variant::One };
-    static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas,   Variant::One };
-  };
-
-  template<> struct Control<AlgoGemm::DenseByBlocks,Variant::One> {
-    static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One };
-  };
-
-  template<> struct Control<AlgoTrsm::DenseByBlocks,Variant::One> {
-    static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One };
-    static constexpr int Trsm[2] = { AlgoTrsm::ExternalBlas, Variant::One };
-  };
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/coo.hpp b/lib/kokkos/example/ichol/src/coo.hpp
deleted file mode 100644
index 977f17e5c5fb2d9ce520548cc04bc15c107a4c60..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/coo.hpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#pragma once
-#ifndef __COO_HPP__
-#define __COO_HPP__
-
-/// \file coo.hpp
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-  
-  using namespace std;
-
-  /// \class Coo
-  /// \brief Sparse coordinate format; (i, j, val).
-  template<typename CrsMatType>
-  class Coo {
-  public:
-    typedef typename CrsMatType::ordinal_type ordinal_type;
-    typedef typename CrsMatType::value_type   value_type;
-
-  public:
-    ordinal_type _i,_j;
-    value_type _val;
-
-  public:
-    ordinal_type& Row() { return _i;   } 
-    ordinal_type& Col() { return _j;   }
-    value_type&   Val() { return _val; }
-
-    ordinal_type  Row() const { return _i;   } 
-    ordinal_type  Col() const { return _j;   }
-    value_type    Val() const { return _val; }
-    
-    Coo() {}
-
-    Coo(const ordinal_type i, 
-        const ordinal_type j, 
-        const value_type val) 
-      : _i(i),
-        _j(j),
-        _val(val) 
-    { }
-
-    Coo(const Coo& b)
-      : _i(b._i),
-        _j(b._j),
-        _val(b._val) 
-    { }
-
-    Coo<CrsMatType>& operator=(const Coo<CrsMatType> &y) {
-      this->_i = y._i;
-      this->_j = y._j;
-      this->_val = y._val;
-
-      return *this;
-    }
-
-    /// \brief Compare "less" index i and j only.
-    bool operator<(const Coo<CrsMatType> &y) const {
-      ordinal_type r_val = (this->_i - y._i);
-      return (r_val == 0 ? this->_j < y._j : r_val < 0);
-    }  
-    
-    /// \brief Compare "equality" only index i and j.
-    bool operator==(const Coo<CrsMatType> &y) const {
-      return (this->_i == y._i) && (this->_j == y._j);
-    }  
- 
-    /// \brief Compare "in-equality" only index i and j.   
-    bool operator!=(const Coo<CrsMatType> &y) const {
-      return !(*this == y);
-    }  
-  };
-  
-}
-#endif
diff --git a/lib/kokkos/example/ichol/src/crs_matrix_base.hpp b/lib/kokkos/example/ichol/src/crs_matrix_base.hpp
deleted file mode 100644
index ad08b8757e83c68b8a9224a1d41c3087930a2eb4..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/crs_matrix_base.hpp
+++ /dev/null
@@ -1,598 +0,0 @@
-#pragma once
-#ifndef __CRS_MATRIX_BASE_HPP__
-#define __CRS_MATRIX_BASE_HPP__
-
-/// \file crs_matrix_base.hpp
-/// \brief CRS matrix base object interfaces to user provided input matrices.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "coo.hpp"
-
-namespace Tacho { 
-
-  using namespace std;
-
-  template< typename , typename > class TaskView ;
-
-  template < typename CrsMatrixType >
-  struct GetCrsMatrixRowViewType {
-    typedef int type ;
-  };
-
-
-  template < typename CrsMatrixViewType , typename TaskFactoryType >
-  struct GetCrsMatrixRowViewType
-    < TaskView<CrsMatrixViewType,TaskFactoryType> >
-  {
-    typedef typename CrsMatrixViewType::row_view_type type ;
-  };
-
-  /// \class CrsMatrixBase
-  /// \breif CRS matrix base object using Kokkos view and subview
-  template<typename ValueType,
-           typename OrdinalType, 
-           typename SizeType = OrdinalType,
-           typename SpaceType = void,
-           typename MemoryTraits = void>
-  class CrsMatrixBase {
-  public:
-    typedef ValueType    value_type;
-    typedef OrdinalType  ordinal_type;
-    typedef SpaceType    space_type;
-    typedef SizeType     size_type;
-    typedef MemoryTraits memory_traits;
-
-    // 1D view, layout does not matter; no template parameters for that
-    typedef Kokkos::View<size_type*,   space_type,memory_traits> size_type_array;
-    typedef Kokkos::View<ordinal_type*,space_type,memory_traits> ordinal_type_array;
-    typedef Kokkos::View<value_type*,  space_type,memory_traits> value_type_array;
-
-    typedef typename size_type_array::value_type*    size_type_array_ptr;
-    typedef typename ordinal_type_array::value_type* ordinal_type_array_ptr;
-    typedef typename value_type_array::value_type*   value_type_array_ptr;
-
-    // range type
-    template<typename T> using range_type = pair<T,T>;
-
-    // external interface
-    typedef Coo<CrsMatrixBase> ijv_type;
-    
-    friend class CrsMatrixHelper;
-
-  private:
-
-    ordinal_type       _m;       //!< # of rows
-    ordinal_type       _n;       //!< # of cols
-    size_type          _nnz;     //!< # of nonzeros
-    size_type_array    _ap;      //!< pointers to column index and values
-    ordinal_type_array _aj;      //!< column index compressed format
-    value_type_array   _ax;      //!< values
-
-  public:
-
-    typedef typename GetCrsMatrixRowViewType< ValueType >::type row_view_type ;
-    typedef Kokkos::View<row_view_type*,space_type> row_view_type_array;
-
-    row_view_type_array _all_row_views ;
-
-  protected:
-
-    void createInternalArrays(const ordinal_type m, 
-                              const ordinal_type n,
-                              const size_type nnz) {
-      _m = m;
-      _n = n;
-      _nnz = nnz;
-
-      if (static_cast<ordinal_type>(_ap.dimension_0()) < m+1)
-        _ap = size_type_array("CrsMatrixBase::RowPtrArray", m+1);
-      
-      if (static_cast<size_type>(_aj.dimension_0()) < nnz)
-        _aj = ordinal_type_array("CrsMatrixBase::ColsArray", nnz);
-
-      if (static_cast<size_type>(_ax.dimension_0()) < nnz)
-        _ax = value_type_array("CrsMatrixBase::ValuesArray", nnz);
-    }
-
-    // Copy sparse matrix structure from coordinate format in 'mm'
-    // to CRS format in Views _ap, _aj, a_x.
-    void ijv2crs(const vector<ijv_type> &mm) {
-
-      ordinal_type ii = 0;
-      size_type jj = 0;
-      
-      ijv_type prev = mm[0];
-      _ap[ii++] = 0;
-      _aj[jj] = prev.Col();
-      _ax[jj] = prev.Val();
-      ++jj;
-      
-      for (typename vector<ijv_type>::const_iterator it=(mm.begin()+1);it<mm.end();++it) {
-        ijv_type aij = (*it);
-        
-        // row index
-        if (aij.Row() != prev.Row()) {
-          _ap[ii++] = jj; 
-        }
-        
-        if (aij == prev) {
-          --jj;
-          _aj[jj]  = aij.Col();
-          _ax[jj] += aij.Val();
-        } else {
-          _aj[jj] = aij.Col();
-          _ax[jj] = aij.Val();
-        }
-        ++jj;
-        
-        prev = aij;
-      }
-      
-      // add the last index to terminate the storage
-      _ap[ii++] = jj;
-      _nnz = jj;
-    }
-    
-  public:
-
-    KOKKOS_INLINE_FUNCTION
-    void setNumNonZeros() { 
-      if (_m) 
-        _nnz = _ap[_m];
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type NumRows() const { return _m; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type NumCols() const { return _n; }
-
-    KOKKOS_INLINE_FUNCTION
-    size_type NumNonZeros() const { return _nnz; }
-
-    KOKKOS_INLINE_FUNCTION
-    size_type_array_ptr RowPtr() const { return &_ap[0]; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type_array_ptr ColPtr() const { return &_aj[0]; }
-
-    KOKKOS_INLINE_FUNCTION
-    value_type_array_ptr ValuePtr() const { return &_ax[0];}
-
-    KOKKOS_INLINE_FUNCTION
-    size_type RowPtr(const ordinal_type i) const { return _ap[i]; }
-    
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type_array_ptr ColsInRow(const ordinal_type i) const { return _aj.data() + _ap[i] ; }
-    
-    KOKKOS_INLINE_FUNCTION
-    value_type_array_ptr ValuesInRow(const ordinal_type i) const { return _ax.data() + _ap[i] ; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type NumNonZerosInRow(const ordinal_type i) const { return (_ap[i+1] - _ap[i]); } 
-
-    KOKKOS_INLINE_FUNCTION
-    value_type& Value(const ordinal_type k) { return _ax[k]; }
-
-    KOKKOS_INLINE_FUNCTION
-    value_type Value(const ordinal_type k) const { return _ax[k]; }
-
-    /// \brief Default constructor.
-    KOKKOS_INLINE_FUNCTION
-    CrsMatrixBase() 
-      : _m(0),
-        _n(0),
-        _nnz(0),
-        _ap(),
-        _aj(),
-        _ax()
-    { }
-
-    /// \brief Constructor with label
-    CrsMatrixBase(const string & ) 
-      : _m(0),
-        _n(0),
-        _nnz(0),
-        _ap(),
-        _aj(),
-        _ax()
-    { }
-
-    /// \brief Copy constructor (shallow copy), for deep-copy use a method copy
-    template<typename VT,
-             typename OT,
-             typename ST,
-             typename SpT,
-             typename MT>
-    CrsMatrixBase(const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) 
-      : _m(b._m),
-        _n(b._n),
-        _nnz(b._nnz),
-        _ap(b._ap), 
-        _aj(b._aj),
-        _ax(b._ax) 
-    { }
-
-    /// \brief Constructor to allocate internal data structures.
-    CrsMatrixBase(const string & ,
-                  const ordinal_type m, 
-                  const ordinal_type n, 
-                  const ordinal_type nnz) 
-      : _m(m),
-        _n(n),
-        _nnz(nnz),
-        _ap("CrsMatrixBase::RowPtrArray", m+1),
-        _aj("CrsMatrixBase::ColsArray", nnz),
-        _ax("CrsMatrixBase::ValuesArray", nnz)
-    { }
-
-    /// \brief Constructor to attach external arrays to the matrix.
-    CrsMatrixBase(const string &,
-                  const ordinal_type m, 
-                  const ordinal_type n, 
-                  const ordinal_type nnz,
-                  const size_type_array &ap,
-                  const ordinal_type_array &aj,
-                  const value_type_array &ax) 
-      : _m(m),
-        _n(n),
-        _nnz(nnz),
-        _ap(ap), 
-        _aj(aj),
-        _ax(ax) 
-    { }
-    
-  // Allow the copy function access to the input CrsMatrixBase
-  // private data.
-  template<typename, typename, typename, typename, typename>
-  friend class CrsMatrixBase ;
-
-  public:
-    /// \brief deep copy of matrix b, potentially different spaces
-    template< typename SpT >
-    int 
-    copy(const CrsMatrixBase<ValueType,OrdinalType,SizeType,SpT,MemoryTraits> &b) {
-
-      space_type::execution_space::fence();
-
-      createInternalArrays(b._m, b._n, b._nnz);
-
-      space_type::execution_space::fence();
-
-      const auto ap_range = range_type<ordinal_type>(0, min(_ap.dimension_0(), b._ap.dimension_0()));
-      const auto aj_range = range_type<size_type>   (0, min(_aj.dimension_0(), b._aj.dimension_0()));
-      const auto ax_range = range_type<size_type>   (0, min(_ax.dimension_0(), b._ax.dimension_0()));
-
-      Kokkos::deep_copy(Kokkos::subview(  _ap, ap_range), 
-                        Kokkos::subview(b._ap, ap_range));
-      Kokkos::deep_copy(Kokkos::subview(  _aj, aj_range),
-                        Kokkos::subview(b._aj, aj_range));
-
-      Kokkos::deep_copy(Kokkos::subview(  _ax, ax_range),
-                        Kokkos::subview(b._ax, ax_range));
-
-      space_type::execution_space::fence();
-
-      return 0;
-    }
-
-    /// \brief deep copy of lower/upper triangular of matrix b
-    int 
-    copy(const int uplo, 
-         const CrsMatrixBase &b) { 
-
-      createInternalArrays(b._m, b._n, b._nnz);
-
-      // assume that matrix b is sorted.
-      switch (uplo) {
-      case Uplo::Lower: {
-        _nnz = 0;
-        for (ordinal_type i=0;i<_m;++i) {
-          size_type jbegin = b._ap[i];
-          size_type jend   = b._ap[i+1];
-          _ap[i] = _nnz;
-          for (size_type j=jbegin;j<jend && (i >= b._aj[j]);++j,++_nnz) {
-            _aj[_nnz] = b._aj[j];
-            _ax[_nnz] = b._ax[j]; 
-          }
-        }
-        _ap[_m] = _nnz;
-        break;
-      }
-      case Uplo::Upper: {
-        _nnz = 0;
-        for (ordinal_type i=0;i<_m;++i) {
-          size_type j = b._ap[i];
-          size_type jend = b._ap[i+1];
-          _ap[i] = _nnz;
-          for ( ;j<jend && (i > b._aj[j]);++j) ;
-          for ( ;j<jend;++j,++_nnz) {
-            _aj[_nnz] = b._aj[j];
-            _ax[_nnz] = b._ax[j]; 
-          }
-        }
-        _ap[_m] = _nnz;
-        break;
-      }
-      }
-
-      return 0;
-    }
-
-    /// \brief deep copy of matrix b with given permutation vectors
-    template<typename VT,
-             typename OT,
-             typename ST,
-             typename SpT,
-             typename MT>
-    int
-    copy(const typename CrsMatrixBase<VT,OT,ST,SpT,MT>::ordinal_type_array &p,
-         const typename CrsMatrixBase<VT,OT,ST,SpT,MT>::ordinal_type_array &ip,
-         const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) {
-
-      createInternalArrays(b._m, b._n, b._nnz);
-
-      // Question:: do I need to use Kokkos::vector ? 
-      //            in other words, where do we permute matrix in factoriztion ?
-      //            permuting a matrix is a kernel ? 
-      vector<ijv_type> tmp;
-
-      // any chance to use parallel_for ?
-      _nnz = 0;
-      for (ordinal_type i=0;i<_m;++i) {
-        ordinal_type ii = ip[i];
-
-        size_type jbegin = b._ap[ii];
-        size_type jend   = b._ap[ii+1];
-
-        _ap[i] = _nnz;
-        for (size_type j=jbegin;j<jend;++j) {
-          ordinal_type jj = p[b._aj[j]];
-          ijv_type aij(i, jj, b._ax[j]);
-          tmp.push_back(aij);
-        }
-
-        sort(tmp.begin(), tmp.end(), less<ijv_type>());
-        for (auto it=tmp.begin();it<tmp.end();++it) {
-          ijv_type aij = (*it);
-
-          _aj[_nnz] = aij.Col();
-          _ax[_nnz] = aij.Val();
-          ++_nnz;
-        }
-        tmp.clear();
-      }
-      _ap[_m] = _nnz;
-
-      return 0;
-    }
-
-    /// \brief add the matrix b into this non-zero entires
-    template<typename VT,
-             typename OT,
-             typename ST,
-             typename SpT,
-             typename MT>
-    int 
-    add(const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) { 
-
-      const ordinal_type m = min(b._m, _m);
-      for (ordinal_type i=0;i<m;++i) {
-        const size_type jaend = _ap[i+1];
-        const size_type jbend = b._ap[i+1];
-
-        size_type ja = _ap[i];
-        size_type jb = b._ap[i];
-        
-        for ( ;jb<jbend;++jb) {
-          for ( ;(_aj[ja]<b._aj[jb] && ja<jaend);++ja);
-          _ax[ja] += (_aj[ja] == b._aj[jb])*b._ax[jb];
-        }
-      }
-
-      return 0;
-    }
-
-    int symmetrize(const int uplo, 
-                   const bool conjugate = false) {
-      vector<ijv_type> mm;
-      mm.reserve(_nnz*2);
-
-      for (ordinal_type i=0;i<_m;++i) {
-        const size_type jbegin = _ap[i];
-        const size_type jend   = _ap[i+1];
-        for (size_type jj=jbegin;jj<jend;++jj) {
-          const ordinal_type j = _aj[jj];
-          const value_type val = (conjugate ? conj(_ax[j]) : _ax[j]);
-          if        (uplo == Uplo::Lower && i > j) {
-            mm.push_back(ijv_type(i, j, val));
-            mm.push_back(ijv_type(j, i, val));
-          } else if (uplo == Uplo::Upper && i < j) {
-            mm.push_back(ijv_type(i, j, val));
-            mm.push_back(ijv_type(j, i, val));
-          } else if (i == j) {
-            mm.push_back(ijv_type(i, i, val));
-          }
-        }
-      }
-      sort(mm.begin(), mm.end(), less<ijv_type>());
-
-      createInternalArrays(_m, _n, mm.size());
-      
-      ijv2crs(mm);
-      
-      return 0;
-    }
-
-    int hermitianize(int uplo) {
-      return symmetrize(uplo, true);
-    }
-
-    ostream& showMe(ostream &os) const {
-      streamsize prec = os.precision();
-      os.precision(8);
-      os << scientific;
-
-      os << " -- CrsMatrixBase -- " << endl
-         << "    # of Rows          = " << _m << endl
-         << "    # of Cols          = " << _n << endl
-         << "    # of NonZeros      = " << _nnz << endl
-         << endl
-         << "    RowPtrArray length = " << _ap.dimension_0() << endl
-         << "    ColArray    length = " << _aj.dimension_0() << endl 
-         << "    ValueArray  length = " << _ax.dimension_0() << endl
-         << endl;
-      
-      const int w = 10;
-      if (_ap.size() && _aj.size() && _ax.size()) {
-        os << setw(w) <<  "Row" << "  " 
-           << setw(w) <<  "Col" << "  " 
-           << setw(w) <<  "Val" << endl;
-        for (ordinal_type i=0;i<_m;++i) {
-          size_type jbegin = _ap[i], jend = _ap[i+1];
-          for (size_type j=jbegin;j<jend;++j) {
-            value_type val = _ax[j];
-            os << setw(w) <<      i << "  " 
-               << setw(w) << _aj[j] << "  " 
-               << setw(w) <<    val << endl;
-          }
-        }
-      }
-
-      os.unsetf(ios::scientific);
-      os.precision(prec);
-
-      return os;
-    }
-
-    int importMatrixMarket(ifstream &file) {
-
-      vector<ijv_type> mm; 
-      const ordinal_type mm_base = 1; 
-
-      {
-        string header;
-        if (file.is_open()) {
-          getline(file, header);
-          while (file.good()) {
-            char c = file.peek();
-            if (c == '%' || c == '\n') {
-              file.ignore(256, '\n');
-            continue;
-            }
-            break;
-          }
-        } else {
-          ERROR(MSG_INVALID_INPUT(file));
-        }
-
-        // check the header
-        bool symmetry = (header.find("symmetric") != string::npos);
-
-        // read matrix specification
-        ordinal_type m, n;
-        size_type nnz;
-        
-        file >> m >> n >> nnz;
-        
-        mm.reserve(nnz*(symmetry ? 2 : 1));
-        for (size_type i=0;i<nnz;++i) {
-          ordinal_type row, col;
-          value_type val;
-          file >> row >> col >> val;
-          
-          row -= mm_base;
-          col -= mm_base;
-          
-          mm.push_back(ijv_type(row, col, val));
-          if (symmetry && row != col)
-            mm.push_back(ijv_type(col, row, val));
-        }
-        sort(mm.begin(), mm.end(), less<ijv_type>());
-      
-        // construct workspace and set variables
-        createInternalArrays(m, n, mm.size());
-      }
-      
-      // change mm to crs
-      ijv2crs(mm);
-      
-      return 0;
-    }
-    
-    int exportMatrixMarket(ofstream &file,
-                           const string comment,
-                           const int uplo = 0) {
-      streamsize prec = file.precision();
-      file.precision(8);
-      file << scientific;
-
-      file << "%%MatrixMarket matrix coordinate "
-           << (is_fundamental<value_type>::value ? "real " : "complex ")
-           << ((uplo == Uplo::Upper || uplo == Uplo::Lower) ? "symmetric " : "general ")
-           << endl;
-
-      file << comment << endl;
-      
-      // cnt nnz
-      size_type nnz = 0;
-      for (ordinal_type i=0;i<_m;++i) {
-        const size_type jbegin = _ap[i], jend = _ap[i+1];
-        for (size_type j=jbegin;j<jend;++j) {
-          if (uplo == Uplo::Upper && i <= _aj[j]) ++nnz;
-          if (uplo == Uplo::Lower && i >= _aj[j]) ++nnz;
-          if (!uplo) ++nnz;
-        }
-      }
-      file << _m << " " << _n << " " << nnz << endl;
-
-      const int w = 10;
-      for (ordinal_type i=0;i<_m;++i) {
-        const size_type jbegin = _ap[i], jend = _ap[i+1];
-        for (size_type j=jbegin;j<jend;++j) {
-          bool flag = false;
-          if (uplo == Uplo::Upper && i <= _aj[j]) flag = true;
-          if (uplo == Uplo::Lower && i >= _aj[j]) flag = true;
-          if (!uplo) flag = true;
-          if (flag) {
-            value_type val = _ax[j];
-            file << setw(w) << (     i+1) << "  " 
-                 << setw(w) << (_aj[j]+1) << "  " 
-                 << setw(w) <<    val << endl;
-          }
-        }
-      }
-
-      file.unsetf(ios::scientific);
-      file.precision(prec);
-
-      return 0;
-    }
-
-    //----------------------------------------------------------------------
-
-    int convertGraph(size_type_array rptr,
-                     ordinal_type_array cidx) const {
-      ordinal_type ii = 0;
-      size_type jj = 0;
-
-      for (ordinal_type i=0;i<_m;++i) {
-        size_type jbegin = _ap[i], jend = _ap[i+1];
-        rptr[ii++] = jj;
-        for (size_type j=jbegin;j<jend;++j)
-          if (i != _aj[j])
-            cidx[jj++] = _aj[j];
-      }
-      rptr[ii] = jj;
-
-      return 0;
-    }
-
-    //----------------------------------------------------------------------
-
-  };
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp b/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp
deleted file mode 100644
index e1ff0f3a9fd403ae51d68f77358409e1e3cd5cca..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#pragma once
-#ifndef __CRS_MATRIX_BASE_IMPL_HPP__
-#define __CRS_MATRIX_BASE_IMPL_HPP__
-
-/// \file crs_matrix_base_impl.hpp
-/// \brief Implementation of external interfaces to CrsMatrixBase
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-
-  using namespace std;
-
-  template<typename VT,
-           typename OT,
-           typename ST,
-           typename SpT,
-           typename MT>
-  inline int 
-  CrsMatrixBase<VT,OT,ST,SpT,MT>::importMatrixMarket(ifstream &file) {
-    // skip initial title comments
-    {
-      ordinal_type m, n;
-      size_type nnz;
-          
-      while (file.good()) {
-        char c = file.peek();
-        if (c == '%' || c == '\n') {
-          file.ignore(256, '\n');
-          continue;
-        }
-        break;
-      }
-          
-      // read matrix specification
-      file >> m >> n >> nnz;
-          
-      // construct workspace and set variables
-      createInternalArrays(m, n, nnz);
-    }
-
-    // read the coordinate format (matrix-market)
-    vector<ijv_type> mm; 
-    mm.reserve(_nnz);
-    {
-      // matrix market use one base index
-      const ordinal_type mm_base = 1; 
-
-      for (size_type i=0;i<_nnz;++i) {
-        ijv_type aij;
-        file >> aij.Row() >> aij.Col() >> aij.Val();
-
-        // one base to zero base
-        aij.Row() -= mm_base;
-        aij.Col() -= mm_base;
-            
-        mm.push_back(aij);
-      }
-      sort(mm.begin(), mm.end(), less<ijv_type>());
-    }
-
-    // change mm to crs
-    {
-      ordinal_type ii = 0;
-      size_type jj = 0;
-
-      ijv_type prev = mm[0];
-      _ap[ii++] = 0;
-      _aj[jj] = prev.Col();
-      _ax[jj] = prev.Val();
-      ++jj;
-
-      for (typename vector<ijv_type>::iterator it=(mm.begin()+1);it<mm.end();++it) {
-        ijv_type aij = (*it);
-        
-        // row index
-        if (aij.Row() != prev.Row()) {
-          _ap[ii++] = jj; 
-        }
-            
-        if (aij == prev) {
-          --jj;
-          _aj[jj]  = aij.Col();
-          _ax[jj] += aij.Val();
-        } else {
-          _aj[jj] = aij.Col();
-          _ax[jj] = aij.Val();
-        }
-        ++jj;
-          
-        prev = aij;
-      }
-          
-      // add the last index to terminate the storage
-      _ap[ii++] = jj;
-      _nnz = jj;
-    }
-      
-    return 0;
-  }
-  
-}
-
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp b/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp
deleted file mode 100644
index 5b80e77935fcb968bff8f05e9876a10299a82182..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-#ifndef __CRS_MATRIX_HELPER_HPP__
-#define __CRS_MATRIX_HELPER_HPP__
-
-/// \file crs_matrix_helper.hpp
-/// \brief This file includes utility functions to convert between flat and hierarchical matrices.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)  
-
-#include "util.hpp"
-
-namespace Tacho { 
-
-  using namespace std;
-
-  class CrsMatrixHelper {
-  public:
-
-    template< typename CrsHierBase >
-    static int fillRowViewArray( CrsHierBase & HU );
-
-    template<typename CrsFlatBase>
-    static int
-    filterZeros(CrsFlatBase &flat);
-    
-    /// \brief Transform a scalar flat matrix to hierarchical matrix of matrices 1x1; testing only.
-    template<typename CrsFlatBase,
-             typename CrsHierBase>
-    static int
-    flat2hier(CrsFlatBase &flat, 
-              CrsHierBase &hier);
-
-    /// \brief Transform a scalar flat matrix to upper hierarchical matrix given scotch info. 
-    template<typename CrsFlatBase,
-             typename CrsHierBase,
-             typename HostOrdinalTypeArray >
-    static int
-    flat2hier(int uplo, 
-              CrsFlatBase &flat, 
-              CrsHierBase &hier,
-              const typename CrsHierBase::ordinal_type       nblks,
-              const HostOrdinalTypeArray range,
-              const HostOrdinalTypeArray tree);
-
-    /// \brief Transform a scalar flat matrix to upper hierarchical matrix given scotch info. 
-    template<typename CrsFlatBase,
-             typename CrsHierBase,
-             typename HostOrdinalTypeArray >
-    static int
-    flat2hier_upper(CrsFlatBase &flat, 
-                    CrsHierBase &hier,
-                    const typename CrsHierBase::ordinal_type       nblks,
-                    const HostOrdinalTypeArray range,
-                    const HostOrdinalTypeArray tree);
-
-    /// \brief Transform a scalar flat matrix to lower hierarchical matrix given scotch info. 
-    template<typename CrsFlatBase,
-             typename CrsHierBase,
-             typename HostOrdinalTypeArray >
-    static int
-    flat2hier_lower(CrsFlatBase &flat, 
-                    CrsHierBase &hier,
-                    const typename CrsHierBase::ordinal_type       nblks,
-                    const HostOrdinalTypeArray range,
-                    const HostOrdinalTypeArray tree);
-  };
-
-}
-
-#include "crs_matrix_helper_impl.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp b/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp
deleted file mode 100644
index 0fc4c9f1b83c0e48d3e42eb61e8e1cea12b1c187..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp
+++ /dev/null
@@ -1,364 +0,0 @@
-
-#ifndef __CRS_MATRIX_HELPER_IMPL_HPP__
-#define __CRS_MATRIX_HELPER_IMPL_HPP__
-
-/// \file crs_matrix_helper_impl.hpp
-/// \brief This file includes utility functions to convert between flat and hierarchical matrices.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template< typename CrsHierBase >
-  struct FunctorFillRowViewArray {
-
-    typedef typename CrsHierBase::ordinal_type         ordinal_type ;
-    typedef typename CrsHierBase::row_view_type_array  row_view_type_array ;
-    typedef typename CrsHierBase::value_type_array     ax_type ;
-
-    typedef ordinal_type value_type ;
-
-    row_view_type_array _all_row_views ;
-    ax_type             _ax ;
-
-    FunctorFillRowViewArray( const row_view_type_array & arg_all_row_views
-                           , const ax_type             & arg_ax )
-      : _all_row_views( arg_all_row_views )
-      , _ax( arg_ax )
-      {}
-
-    KOKKOS_INLINE_FUNCTION
-    void operator()( ordinal_type k , ordinal_type & value ) const
-      { value += _ax(k).NumRows(); }
-
-    KOKKOS_INLINE_FUNCTION
-    void operator()( ordinal_type k , ordinal_type & value , bool final ) const
-      {
-        if ( final ) {
-          const int begin = value ;
-          const int end   = begin + _ax(k).NumRows();
-
-          auto sub = Kokkos::subview( _all_row_views, Kokkos::pair<int,int>(begin,end) );
-
-          _ax(k).setRowViewArray( sub );
-        }
-
-        value += _ax(k).NumRows();
-      }
-  };
-
-  template< typename CrsHierBase >
-  int CrsMatrixHelper::fillRowViewArray( CrsHierBase & device_HU )
-  {
-    typedef typename CrsHierBase::row_view_type_array row_view_type_array ;
-    typedef typename CrsHierBase::space_type          space_type ;
-
-    ordinal_type total_row_view_count = 0 ;
-
-    Kokkos::RangePolicy< space_type >
-      range_policy( 0 , device_HU.NumNonZeros() );
-
-    space_type::fence();
-
-    {
-      FunctorFillRowViewArray< CrsHierBase >
-         functor( row_view_type_array() , device_HU._ax );
-
-
-      Kokkos::parallel_reduce( range_policy , functor , total_row_view_count );
-    }
-
-    device_HU._all_row_views =
-      row_view_type_array("RowViews",total_row_view_count);
-
-    space_type::fence();
-
-    {
-      FunctorFillRowViewArray< CrsHierBase >
-         functor( device_HU._all_row_views , device_HU._ax );
-
-      Kokkos::parallel_scan( range_policy , functor );
-    }
-
-    space_type::fence();
-
-    return 0 ;
-  }
-  
-  template<typename CrsFlatBase>
-  int
-  CrsMatrixHelper::filterZeros(CrsFlatBase &flat) {
-    typedef typename CrsFlatBase::ordinal_type           ordinal_type;
-    typedef typename CrsFlatBase::size_type              size_type;
-    typedef typename CrsFlatBase::value_type             value_type;
-    
-    typedef typename CrsFlatBase::ordinal_type_array_ptr ordinal_type_array_ptr;
-    typedef typename CrsFlatBase::value_type_array_ptr   value_type_array_ptr;
-    
-    size_type nz = 0;
-    const value_type zero(0);
-    
-    for (ordinal_type k=0;k<flat.NumNonZeros();++k) 
-      nz += (flat.Value(k) == zero) ;
-    
-    if (nz) {
-      CrsFlatBase resized(flat.Label() + "::ZeroFiltered", 
-                          flat.NumRows(),
-                          flat.NumCols(),
-                          flat.NumNonZeros() - nz);
-      
-      ordinal_type_array_ptr rows = resized.RowPtr(); rows[0] = 0;
-      ordinal_type_array_ptr cols = resized.ColPtr();
-      value_type_array_ptr vals = resized.ValuePtr();    
-      
-      size_type nnz = 0;
-      for (ordinal_type i=0;i<flat.NumRows();++i) {
-        const ordinal_type nnz_in_row = flat.NumNonZerosInRow(i);
-        const ordinal_type_array_ptr cols_in_row = flat.ColsInRow(i);
-        const value_type_array_ptr vals_in_row = flat.ValuesInRow(i);
-        
-        for (ordinal_type j=0;j<nnz_in_row;++j) {
-          if (vals_in_row[j] != zero) {
-            cols[nnz] = cols_in_row[j];
-            vals[nnz] = vals_in_row[j];
-            ++nnz;
-          }
-        }
-        rows[i+1] = nnz;
-      }
-      flat = resized;
-      resized.setNumNonZeros();
-    }
-
-    return 0;
-  }
-
-
-  template<typename CrsFlatBase,
-           typename CrsHierBase>
-  int
-  CrsMatrixHelper::flat2hier(CrsFlatBase &flat,
-                             CrsHierBase &hier) {
-    typedef typename CrsHierBase::ordinal_type           ordinal_type;
-    typedef typename CrsHierBase::size_type              size_type;
-    typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr;
-
-    size_type nnz = 0;
-
-    hier.createInternalArrays(flat.NumRows(), flat.NumCols(), flat.NumNonZeros());
-
-    for (ordinal_type i=0;i<flat.NumRows();++i) {
-      ordinal_type jsize = flat.NumNonZerosInRow(i);
-
-      hier._ap[i] = nnz;
-      ordinal_type_array_ptr ci = flat.ColsInRow(i);
-      for (ordinal_type j=0;j<jsize;++j,++nnz) {
-        hier._aj[nnz] = ci[j];
-        hier._ax[nnz].setView( flat,     i, 1,
-                              /**/   ci[j], 1);
-      }
-    }
-
-    hier._ap[flat.NumRows()] = nnz;
-    hier._nnz = nnz;
-
-    return 0;
-  }
-
-  template< typename CrsFlatBase ,
-            typename CrsHierBase ,
-            typename HostOrdinalTypeArray >
-  int
-  CrsMatrixHelper::flat2hier(int uplo,
-                             CrsFlatBase &flat,
-                             CrsHierBase &hier,
-                             const typename CrsHierBase::ordinal_type       nblks,
-                             const HostOrdinalTypeArray range ,
-                             const HostOrdinalTypeArray tree) {
-    switch(uplo) {
-    case Uplo::Upper: return flat2hier_upper(flat, hier, nblks, range, tree);
-    case Uplo::Lower: return flat2hier_lower(flat, hier, nblks, range, tree);
-    }
-    return -1;
-  }
-
-  template<typename CrsFlatBase,
-           typename CrsHierBase,
-           typename HostOrdinalTypeArray >
-  int
-  CrsMatrixHelper::flat2hier_upper(CrsFlatBase & device_flat, 
-                                   CrsHierBase & device_hier,
-                                   const typename CrsHierBase::ordinal_type       nblks,
-                                   const HostOrdinalTypeArray range,
-                                   const HostOrdinalTypeArray tree) {
-    typedef typename CrsHierBase::ordinal_type            ordinal_type;
-    typedef typename CrsHierBase::size_type               size_type;
-    
-    //typedef typename CrsHierBase::ordinal_type_array     ordinal_type_array;
-    //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr;
-    //typedef typename CrsHierBase::value_type_array_ptr   value_type_array_ptr;
-    
-    size_type nnz = 0;
-    
-    // count nnz and nnz in rows for the upper triangular hier matrix
-    for (ordinal_type i=0;i<nblks;++i) 
-      for (ordinal_type j=i;j != -1;++nnz,j=tree[j]) ;
-    
-    // create upper triangular block matrix
-    device_hier.createInternalArrays(nblks, nblks, nnz);    
-
-    typename CrsHierBase::size_type_array::HostMirror
-      host_ap = Kokkos::create_mirror_view( device_hier._ap );
-
-    typename CrsHierBase::ordinal_type_array::HostMirror
-      host_aj = Kokkos::create_mirror_view( device_hier._aj );
-
-    typename CrsHierBase::value_type_array::HostMirror
-      host_ax = Kokkos::create_mirror_view( device_hier._ax );
-
-    nnz = 0;
-    for (ordinal_type i=0;i<nblks;++i) {
-      host_ap[i] = nnz;
-      for (ordinal_type j=i;j != -1;++nnz,j=tree[j]) {
-        host_aj[nnz] = j;
-        host_ax[nnz].setView( device_flat, range[i], (range[i+1] - range[i]),
-                             /**/          range[j], (range[j+1] - range[j]));
-
-        // this checking might more expensive 
-        // and attempts to access device memory from the host
-        // if (!host_ax[nnz].countNumNonZeros())
-        //  --nnz;
-      }
-    }
-    
-    host_ap[nblks] = nnz;
-
-    Kokkos::deep_copy( device_hier._ap , host_ap );
-    Kokkos::deep_copy( device_hier._aj , host_aj );
-    Kokkos::deep_copy( device_hier._ax , host_ax );
-
-    device_hier._nnz = nnz;
-
-    return 0;
-  }
-
-  // template<typename CrsFlatBase,
-  //          typename CrsHierBase>
-  // int
-  // CrsMatrixHelper::flat2hier_upper(CrsFlatBase &flat,
-  //                                  CrsHierBase &hier,
-  //                                  const typename CrsHierBase::ordinal_type       nblks,
-  //                                  const typename CrsHierBase::ordinal_type_array range,
-  //                                  const typename CrsHierBase::ordinal_type_array tree) {
-  //   typedef typename CrsHierBase::ordinal_type            ordinal_type;
-  //   typedef typename CrsHierBase::size_type               size_type;
-
-  //   typedef typename CrsHierBase::ordinal_type_array     ordinal_type_array;
-  //   //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr;
-  //   //typedef typename CrsHierBase::value_type_array_ptr   value_type_array_ptr;
-
-  //   ordinal_type_array sibling("CrsMatrixHelper::flat2hier_upper::sibling", nblks);
-
-  //   // check the end of adjacent siblings (if not adjacent, they are separators)
-  //   ordinal_type p = tree[0];
-  //   for (ordinal_type i=1;i<nblks;++i) {
-  //     const ordinal_type j = tree[i];
-  //     if (p != j) {
-  //       p = j;
-  //       sibling[i-1] = -1;
-  //     }
-  //   }
-  //   sibling[nblks-1] = -1;
-
-  //   size_type nnz = 0;
-
-  //   // count nnz and nnz in rows for the upper triangular hier matrix
-  //   for (ordinal_type i=0;i<nblks;++i) {                  // search for all rows
-  //     for (ordinal_type j=i;j != -1;j=tree[j]) {          // move up
-  //       ordinal_type k=j;
-  //       do {
-  //         ++nnz;
-  //       } while (sibling[k++] != -1);
-  //     }
-  //   }
-
-  //   // create upper triangular block matrix
-  //   hier.createInternalArrays(nblks, nblks, nnz);
-
-  //   nnz = 0;
-  //   for (ordinal_type i=0;i<nblks;++i) {
-  //     hier._ap[i] = nnz;
-  //     for (ordinal_type j=i;j != -1;j=tree[j]) {
-  //       ordinal_type k=j;
-  //       do {
-  //         hier._aj[nnz] = k;
-  //         hier._ax[nnz].setView( flat, range[i], (range[i+1] - range[i]),
-  //                               /**/   range[k], (range[k+1] - range[k]));
-
-  //         // this checking might more expensive
-  //         if (hier._ax[nnz].hasNumNonZeros())
-  //           ++nnz;
-  //       } while (sibling[k++] != -1);
-  //     }
-  //   }
-  //   hier._ap[nblks] = nnz;
-  //   hier._nnz = nnz;
-
-  //   return 0;
-  // }
-
-  template<typename CrsFlatBase,
-           typename CrsHierBase,
-           typename HostOrdinalTypeArray >
-  int
-  CrsMatrixHelper::flat2hier_lower(CrsFlatBase &flat,
-                                   CrsHierBase &hier,
-                                   const typename CrsHierBase::ordinal_type       nblks,
-                                   const HostOrdinalTypeArray range,
-                                   const HostOrdinalTypeArray tree) {
-    ERROR(MSG_NOT_YET_IMPLEMENTED);
-
-    // typedef typename CrsHierBase::ordinal_type           ordinal_type;
-    // typedef typename CrsHierBase::size_type              size_type;
-
-    // typedef typename CrsHierBase::ordinal_type_array     ordinal_type_array;
-    // //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr;
-    // //typedef typename CrsHierBase::value_type_array_ptr   value_type_array_ptr;
-
-    // ordinal_type_array tmp = ordinal_type_array("flat2hier:tmp", nblks+1);
-    // size_type nnz = 0;
-
-    // // count nnz and nnz in rows for lower triangular matrix
-    // for (ordinal_type i=0;i<nblks;++i)
-    //   for (ordinal_type j=i;j != -1;++nnz) {
-    //     ++tmp[j];
-    //     j = tree[j];
-    //   }
-
-    // // count nnz and nnz in rows for lower triangular matrix
-    // hier.createInternalArrays(nblks, nblks, nnz);
-    // for (ordinal_type i=1;i<(nblks+1);++i)
-    //   hier._ap[i] = hier._ap[i-1] + tmp[i-1];
-
-    // for (ordinal_type i=0;i<(nblks+1);++i)
-    //   tmp[i] = hier._ap[i];
-
-    // for (ordinal_type i=0;i<nblks;++i)
-    //   for (ordinal_type j=i;j != -1;j=tree[j]) {
-    //     hier._aj[tmp[j]] = i;
-    //     hier._ax[tmp[j]].setView( flat, range[j], (range[j+1] - range[j]),
-    //                              /**/   range[i], (range[i+1] - range[i]));
-    //     ++tmp[j];
-    //   }
-
-    return 0;
-  }
-
-}
-
-
-#endif
-
diff --git a/lib/kokkos/example/ichol/src/crs_matrix_view.hpp b/lib/kokkos/example/ichol/src/crs_matrix_view.hpp
deleted file mode 100644
index 2a55e6fac9b64eca3eade412a1511913baafab85..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/crs_matrix_view.hpp
+++ /dev/null
@@ -1,226 +0,0 @@
-#pragma once
-#ifndef __CRS_MATRIX_VIEW_HPP__
-#define __CRS_MATRIX_VIEW_HPP__
-
-/// \file crs_matrix_view.hpp
-/// \brief CRS matrix view object creates 2D view to setup a computing region.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-
-namespace Tacho { 
-
-  using namespace std;
-
-  template<typename CrsMatBaseType>  
-  class CrsRowView;
-
-  template<typename CrsMatBaseType>
-  class CrsMatrixView {
-  public:
-    typedef typename CrsMatBaseType::space_type    space_type;
-    
-    typedef typename CrsMatBaseType::value_type    value_type;
-    typedef typename CrsMatBaseType::ordinal_type  ordinal_type;
-    typedef typename CrsMatBaseType::size_type     size_type;
-
-    typedef CrsMatBaseType             mat_base_type;
-    typedef CrsRowView<mat_base_type>  row_view_type;
-
-    // be careful this use rcp and atomic operation
-    // - use setView to create a view if _rows is not necessary
-    // - copy constructor and assignment operator will do soft copy of the object
-    typedef Kokkos::View<row_view_type*,space_type,Kokkos::MemoryUnmanaged> row_view_type_array;
-    
-  private:
-    CrsMatBaseType _base;    // shallow copy of the base object
-    ordinal_type  _offm;     // offset in rows
-    ordinal_type  _offn;     // offset in cols
-    ordinal_type  _m;        // # of rows
-    ordinal_type  _n;        // # of cols
-
-    row_view_type_array _rows;
-    
-  public:
-
-    KOKKOS_INLINE_FUNCTION
-    void setRowViewArray( const row_view_type_array & arg_rows )
-      {
-        _rows = arg_rows ;
-
-        for (ordinal_type i=0;i<_m;++i) {
-          _rows[i].setView(*this, i);
-        }
-      }
-
-    KOKKOS_INLINE_FUNCTION
-    row_view_type& RowView(const ordinal_type i) const { return _rows[i]; }
-
-    KOKKOS_INLINE_FUNCTION
-    void setView(const CrsMatBaseType &base,
-                 const ordinal_type offm, const ordinal_type m,
-                 const ordinal_type offn, const ordinal_type n) {
-      _base = base;
-
-      _offm = offm; _m = m;
-      _offn = offn; _n = n;
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    const CrsMatBaseType & BaseObject() const { return _base; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type  OffsetRows() const { return _offm; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type  OffsetCols() const { return _offn; }
-
-    KOKKOS_INLINE_FUNCTION    
-    ordinal_type  NumRows() const { return _m; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type  NumCols() const { return _n; }
-
-    KOKKOS_INLINE_FUNCTION
-    bool hasNumNonZeros() const { 
-      const ordinal_type m = NumRows();
-      for (ordinal_type i=0;i<m;++i) {
-        row_view_type row;
-        row.setView(*this, i);
-        if (row.NumNonZeros()) return true;
-      }
-      return false;
-    }
-
-    inline
-    size_type countNumNonZeros() const { 
-      size_type nnz = 0;
-      const ordinal_type m = NumRows();
-      for (ordinal_type i=0;i<m;++i) {
-        row_view_type row;
-        row.setView(*this, i);
-        nnz += row.NumNonZeros();
-      }
-      return nnz; 
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    CrsMatrixView()
-      : _base(),
-        _offm(0),
-        _offn(0),
-        _m(0),
-        _n(0),
-        _rows()
-    { } 
-
-    KOKKOS_INLINE_FUNCTION
-    CrsMatrixView(const CrsMatrixView &b)
-      : _base(b._base),
-        _offm(b._offm),
-        _offn(b._offn),
-        _m(b._m),
-        _n(b._n),
-        _rows(b._rows)
-    { } 
-
-    KOKKOS_INLINE_FUNCTION
-    CrsMatrixView(const CrsMatBaseType & b)
-      : _base(b),
-        _offm(0),
-        _offn(0),
-        _m(b.NumRows()),
-        _n(b.NumCols()),
-        _rows()
-    { } 
-
-    CrsMatrixView(const CrsMatBaseType & b,
-                  const ordinal_type offm, const ordinal_type m,
-                  const ordinal_type offn, const ordinal_type n) 
-      : _base(b),
-        _offm(offm),
-        _offn(offn),
-        _m(m),
-        _n(n),
-        _rows()
-    { } 
-
-    ostream& showMe(ostream &os) const {
-      const int w = 4;
-      os << "CrsMatrixView, "
-         << " Offs ( " << setw(w) << _offm << ", " << setw(w) << _offn << " ); "
-         << " Dims ( " << setw(w) << _m    << ", " << setw(w) << _n    << " ); "
-         << " NumNonZeros = " << countNumNonZeros() << ";";
-
-      return os;
-    }
-
-  };
-}
-
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#if ! KOKKOS_USING_EXP_VIEW
-
-namespace Kokkos {
-  namespace Impl {
-    
-    //  The Kokkos::View allocation will by default assign each allocated datum to zero.
-    //  This is not the required initialization behavior when
-    //  Tacho::CrsRowView and Tacho::CrsMatrixView
-    //  are used within a Kokkos::View.
-    //  Create a partial specialization of the Kokkos::Impl::AViewDefaultConstruct
-    //  to replace the assignment initialization with placement new initialization.
-    //
-    //  This work-around is necessary until a TBD design refactorization of Kokkos::View.
-    
-    template< class ExecSpace , typename T >
-    struct ViewDefaultConstruct< ExecSpace , Tacho::CrsRowView<T> , true >
-    {
-      typedef Tacho::CrsRowView<T> type ;
-      type * const m_ptr ;
-      
-      KOKKOS_FORCEINLINE_FUNCTION
-      void operator()( const typename ExecSpace::size_type& i ) const
-      { new(m_ptr+i) type(); }
-      
-      ViewDefaultConstruct( type * pointer , size_t capacity )
-        : m_ptr( pointer )
-      {
-        Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
-        parallel_for( range , *this );
-        ExecSpace::fence();
-      }
-    };
-    
-    template< class ExecSpace , typename T >
-    struct ViewDefaultConstruct< ExecSpace , Tacho::CrsMatrixView<T> , true >
-    {
-      typedef Tacho::CrsMatrixView<T> type ;
-      type * const m_ptr ;
-      
-      KOKKOS_FORCEINLINE_FUNCTION
-      void operator()( const typename ExecSpace::size_type& i ) const
-      { new(m_ptr+i) type(); }
-      
-      ViewDefaultConstruct( type * pointer , size_t capacity )
-        : m_ptr( pointer )
-      {
-        Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
-        parallel_for( range , *this );
-        ExecSpace::fence();
-      }
-    };
-
-  } // namespace Impl
-} // namespace Kokkos
-
-#endif /* #if ! KOKKOS_USING_EXP_VIEW */
-
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/crs_row_view.hpp b/lib/kokkos/example/ichol/src/crs_row_view.hpp
deleted file mode 100644
index 8556bcb9e637dd64afdf92f4ef6b526a14562d09..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/crs_row_view.hpp
+++ /dev/null
@@ -1,185 +0,0 @@
-#pragma once
-#ifndef __CRS_ROW_VIEW_HPP__
-#define __CRS_ROW_VIEW_HPP__
-
-/// \file crs_row_view.hpp
-/// \brief A view to a row extracted from CrsMatrixView.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-
-  using namespace std;
-
-  /// \class CrsRowView
-  template<typename CrsMatBaseType>
-  class CrsRowView {
-  public:
-    typedef typename CrsMatBaseType::ordinal_type           ordinal_type;
-    typedef typename CrsMatBaseType::value_type             value_type;
-    typedef typename CrsMatBaseType::ordinal_type_array_ptr ordinal_type_array_ptr;
-    typedef typename CrsMatBaseType::value_type_array_ptr   value_type_array_ptr;
-    
-  private:
-    // row info
-    ordinal_type _offn, _n;    
-
-    // this assumes a contiguous memory buffer
-    ordinal_type_array_ptr _aj, _ajn; // column index compressed format in row
-    value_type_array_ptr   _ax;                // values 
-
-    static KOKKOS_INLINE_FUNCTION
-    typename CrsMatBaseType::ordinal_type_array_ptr
-    lower_bound( typename CrsMatBaseType::ordinal_type_array_ptr begin ,
-                 typename CrsMatBaseType::ordinal_type_array_ptr const end ,
-                 typename CrsMatBaseType::ordinal_type           const val )
-      {
-         typename CrsMatBaseType::ordinal_type_array_ptr it = begin ;
-         int count = end - begin ;
-         int step = 0 ;
-         while (count>0) {
-           it = begin ;
-           it += ( step = (count >> 1) );
-           if (*it<val) {
-             begin=++it;
-             count-=step+1;
-           }
-           else { count=step; }
-         }
-         return begin;
-      }
-
-  public:
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type OffsetCols() const { return _offn; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type NumCols() const { return _n; }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type NumNonZeros() const { return _ajn - _aj; } 
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type Col(const ordinal_type j) const { return _aj[j] - _offn; }
-
-    KOKKOS_INLINE_FUNCTION
-    value_type& Value(const ordinal_type j) { return _ax[j]; }
-
-    KOKKOS_INLINE_FUNCTION
-    value_type Value(const ordinal_type j) const { return _ax[j]; }
-    
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type Index(const ordinal_type col ) const {
-      const ordinal_type loc = _offn + col ;
-      // binary search
-      ordinal_type_array_ptr aj = CrsRowView::lower_bound(_aj, _ajn, loc);
-
-      // if found, return index for the location, 
-      // otherwise return -1 (not found), -2 (end of array)
-      return (aj < _ajn ? (*aj == loc ? aj - _aj : -1) : -2);
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    ordinal_type Index(const ordinal_type col,
-                       const ordinal_type prev ) const {
-      const ordinal_type loc = _offn + col;
-      ordinal_type_array_ptr aj = _aj + prev;
-
-      // binary search
-      // aj = lower_bound(aj, _ajn, loc);
-
-      // linear search from prev: this gains about 45 % faster
-      for ( ;aj < _ajn && *aj<loc; ++aj); 
-
-      // if found, return index for the location, 
-      // otherwise return -1 (not found), -2 (end of array)
-      return (aj < _ajn ? (*aj == loc ? aj - _aj : -1) : -2);
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    value_type ValueAtColumn(const ordinal_type col) const {
-      const ordinal_type j = Index(col);
-      return (j < 0 ? value_type(0) : _ax[j]);
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    CrsRowView()
-      : _offn(0),
-        _n(0),
-        _aj(),
-        _ajn(),
-        _ax() 
-    { }
-
-
-    KOKKOS_INLINE_FUNCTION
-    CrsRowView(const ordinal_type           offn,
-               const ordinal_type           n,
-               const ordinal_type_array_ptr aj,
-               const ordinal_type_array_ptr ajn,
-               const value_type_array_ptr   ax) 
-      : _offn(offn),
-        _n(n),
-        _aj(aj),
-        _ajn(ajn),
-        _ax(ax) 
-    { }
-
-    KOKKOS_INLINE_FUNCTION
-    CrsRowView(const CrsMatrixView<CrsMatBaseType> &A, 
-               const ordinal_type i) {
-      this->setView(A, i);
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    CrsRowView(const CrsMatBaseType &A, 
-               const ordinal_type i) {
-      this->setView(A, i);
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    void setView(const CrsMatrixView<CrsMatBaseType> &A, 
-                 const ordinal_type i) {
-      _offn = A.OffsetCols();
-      _n    = A.NumCols();
-
-      const ordinal_type ii = A.OffsetRows() + i;
-
-      const typename CrsMatBaseType::ordinal_type_array_ptr cols = A.BaseObject().ColsInRow(ii);
-      const typename CrsMatBaseType::ordinal_type_array_ptr next = A.BaseObject().ColsInRow(ii+1);
-      const typename CrsMatBaseType::value_type_array_ptr   vals = A.BaseObject().ValuesInRow(ii);
-
-      // [cols..next) is sorted so a log(N) search could performed
-      _aj  = CrsRowView::lower_bound(cols, next, _offn);
-      _ajn = CrsRowView::lower_bound(_aj,  next, _offn+_n);
-
-      _ax  = &vals[_aj - cols];
-    }
-
-    KOKKOS_INLINE_FUNCTION
-    void setView(const CrsMatBaseType &A, 
-                 const ordinal_type i) {
-      _offn = 0;
-      _n    = A.NumCols();
-      _aj   = A.ColsInRow(i);
-      _ajn  = A.ColsInRow(i+1);
-      _ax   = A.ValuesInRow(i);
-    }
-
-    ostream& showMe(ostream &os) const {                                                
-      const ordinal_type nnz = NumNonZeros();
-      const ordinal_type offset = OffsetCols();
-      os << "  offset = " << offset
-         << ", nnz = " << nnz
-         << endl; 
-      for (ordinal_type j=0;j<nnz;++j) {
-        const value_type val = _ax[j];
-        os << "(" << Col(j) << ", "
-           << val << ")"
-           << endl;
-      }
-      return os;
-    }
-  };
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/dot.hpp b/lib/kokkos/example/ichol/src/dot.hpp
deleted file mode 100644
index acf927e0689759873b441012e187131a54055f88..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/dot.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#pragma once
-#ifndef __DOT_HPP__
-#define __DOT_HPP__
-
-/// \file dot.hpp
-/// \brief Sparse dot product.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-/// dot_type result = x^H y
-
-namespace Tacho { 
-
-  using namespace std;
-
-  template<typename T> struct DotTraits {
-    typedef T dot_type;
-
-    static KOKKOS_FORCEINLINE_FUNCTION 
-    dot_type 
-    // dot(const T &x, const T &y) { return conj<T>(x)*y; }
-    dot(const T &x, const T &y) { return x*y; }
-  }; 
-
-  template<typename CrsRowViewType>
-  KOKKOS_INLINE_FUNCTION 
-  typename CrsRowViewType::value_type
-  dot(const CrsRowViewType x, const CrsRowViewType y) {
-    typedef typename CrsRowViewType::ordinal_type ordinal_type;
-    typedef typename CrsRowViewType::value_type   value_type;
-
-    typedef DotTraits<value_type> dot_traits;
-
-    value_type r_val(0);
-
-    const ordinal_type nnz_x = x.NumNonZeros();
-    const ordinal_type nnz_y = y.NumNonZeros();
-
-    for (ordinal_type jx=0, jy=0;jx<nnz_x && jy<nnz_y;) {
-      const ordinal_type diff = x.Col(jx) - y.Col(jy);
-      const ordinal_type sign = (0 < diff) - (diff < 0);
-      switch (sign) {
-      case  0:
-        r_val += dot_traits::dot(x.Value(jx++), y.Value(jy++));
-        break;
-      case -1: ++jx; break;
-      case  1: ++jy; break;
-      }
-    }
-    
-    return r_val;
-  }
-
-  template<typename CrsRowViewType>
-  KOKKOS_INLINE_FUNCTION 
-  typename CrsRowViewType::value_type
-  dot(const CrsRowViewType x) {
-    typedef typename CrsRowViewType::ordinal_type ordinal_type;
-    typedef typename CrsRowViewType::value_type   value_type;
-
-    typedef DotTraits<value_type> dot_traits;
-
-    value_type r_val(0);
-
-    const ordinal_type nnz = x.NumNonZeros();
-
-    for (ordinal_type j=0;j<nnz;++j) 
-      r_val += dot_traits::dot(x.Value(j), x.Value(j));
-    
-    return r_val;
-  }
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/gemm.hpp b/lib/kokkos/example/ichol/src/gemm.hpp
deleted file mode 100644
index 33c6058ec6fc6727dc62a320cab7bbb1855ea93f..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/gemm.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-#pragma once
-#ifndef __GEMM_HPP__
-#define __GEMM_HPP__
-
-/// \file gemm.hpp
-/// \brief Sparse matrix-matrix multiplication on given sparse patterns.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "control.hpp"
-#include "partition.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<int ArgTransA, int ArgTransB, int ArgAlgo,
-           int ArgVariant = Variant::One,
-           template<int,int> class ControlType = Control>
-  struct Gemm {
-
-    // data-parallel interface
-    // =======================
-    template<typename ScalarType,
-             typename ExecViewTypeA,
-             typename ExecViewTypeB,
-             typename ExecViewTypeC>
-    KOKKOS_INLINE_FUNCTION
-    static int invoke(typename ExecViewTypeA::policy_type &policy,
-                      const typename ExecViewTypeA::policy_type::member_type &member,
-                      const ScalarType alpha,
-                      typename ExecViewTypeA::matrix_type &A,
-                      typename ExecViewTypeB::matrix_type &B,
-                      const ScalarType beta,
-                      typename ExecViewTypeC::matrix_type &C);
-
-    // task-data parallel interface
-    // ============================
-    template<typename ScalarType,
-             typename ExecViewTypeA,
-             typename ExecViewTypeB,
-             typename ExecViewTypeC>
-    class TaskFunctor {
-    public:
-      typedef typename ExecViewTypeA::policy_type policy_type;
-      typedef typename policy_type::member_type member_type;
-      typedef int value_type;
-
-    private:
-      ScalarType _alpha, _beta;
-      typename ExecViewTypeA::matrix_type _A;
-      typename ExecViewTypeB::matrix_type _B;
-      typename ExecViewTypeC::matrix_type _C;
-
-      policy_type _policy;
-
-    public:
-      KOKKOS_INLINE_FUNCTION
-      TaskFunctor(const policy_type & P,
-                  const ScalarType alpha,
-                  const typename ExecViewTypeA::matrix_type & A,
-                  const typename ExecViewTypeB::matrix_type & B,
-                  const ScalarType beta,
-                  const typename ExecViewTypeC::matrix_type & C)
-        : _alpha(alpha),
-          _beta(beta),
-          _A(A),
-          _B(B),
-          _C(C),
-          _policy(P)
-      { }
-
-      string Label() const { return "Gemm"; }
-
-      // task execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(value_type &r_val) {
-        r_val = Gemm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB,ExecViewTypeC>(_policy, _policy.member_single(),
-                             _alpha, _A, _B, _beta, _C);
-      }
-
-      // task-data execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(const member_type &member, value_type &r_val) {
-        r_val = Gemm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB,ExecViewTypeC>(_policy, member,
-                             _alpha, _A, _B, _beta, _C);
-      }
-
-    };
-
-  };
-
-}
-
-
-// #include "gemm_nt_nt.hpp"
-#include "gemm_ct_nt.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp b/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp
deleted file mode 100644
index 13d2518cab90896929ecb58645e61aeb51849394..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-#ifndef __GEMM_CT_NT_HPP__
-#define __GEMM_CT_NT_HPP__
-
-/// \file gemm_ct_nt.hpp
-/// \brief Sparse matrix-matrix multiplication on given sparse patterns.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "gemm_ct_nt_for_factor_blocked.hpp"
-// #include "gemm_ct_nt_for_tri_solve_blocked.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp
deleted file mode 100644
index 88a4658482a1504ab6ad6334d65bd34a7dea055f..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#pragma once
-#ifndef __GEMM_CT_NT_FOR_FACTOR_BLOCKED_HPP__
-#define __GEMM_CT_NT_FOR_FACTOR_BLOCKED_HPP__
-
-/// \file gemm_ct_nt_for_factor_blocked.hpp
-/// \brief Sparse matrix-matrix multiplication on given sparse patterns.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho {
-
-  using namespace std;
-
-  // Gemm used in the factorization phase
-  // ====================================
-  template<>
-  template<typename ScalarType,
-           typename CrsExecViewTypeA,
-           typename CrsExecViewTypeB,
-           typename CrsExecViewTypeC>
-  KOKKOS_INLINE_FUNCTION
-  int
-  Gemm<Trans::ConjTranspose,Trans::NoTranspose,
-       AlgoGemm::ForFactorBlocked>
-  ::invoke(typename CrsExecViewTypeA::policy_type &policy,
-           const typename CrsExecViewTypeA::policy_type::member_type &member,
-           const ScalarType alpha,
-           typename CrsExecViewTypeA::matrix_type &A,
-           typename CrsExecViewTypeB::matrix_type &B,
-           const ScalarType beta,
-           typename CrsExecViewTypeC::matrix_type &C) {
-    typedef typename CrsExecViewTypeA::ordinal_type      ordinal_type;
-    typedef typename CrsExecViewTypeA::value_type        value_type;
-    typedef typename CrsExecViewTypeA::row_view_type     row_view_type;
-
-
-if ( false && member.team_rank() == 0 ) {
- printf("Gemm [%d +%d)x[%d +%d)\n"
-       , C.OffsetRows()
-       , C.NumRows()
-       , C.OffsetCols()
-       , C.NumCols()
-       );
-}
-
-    // scale the matrix C with beta
-    scaleCrsMatrix<ScalarType,CrsExecViewTypeC>(member, beta, C);
-
-    // Sparse matrix-matrix multiply:
-    // C(i,j) += alpha*A'(i,k)*B(k,j)
-
-    const ordinal_type mA = A.NumRows();
-    for (ordinal_type k=0;k<mA;++k) {
-      row_view_type &a = A.RowView(k);
-      const ordinal_type nnz_a = a.NumNonZeros();
-
-      row_view_type &b = B.RowView(k);
-      const ordinal_type nnz_b = b.NumNonZeros();
-
-      if (nnz_a > 0 && nnz_b > 0 ) {
-#if 0
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(member, 0, nnz_a),
-          [&](const ordinal_type i) {
-             const ordinal_type row_at_i  = a.Col(i);
-             const value_type   val_at_ik = a.Value(i);
-             // const value_type   val_at_ik = conj(a.Value(i));
-
-             row_view_type &c = C.RowView(row_at_i);
-
-             ordinal_type idx = 0;
-             for (ordinal_type j=0;j<nnz_b && (idx > -2);++j) {
-                const ordinal_type col_at_j  = b.Col(j);
-                const value_type   val_at_kj = b.Value(j);
-
-                idx = c.Index(col_at_j, idx);
-                if (idx >= 0)
-                  c.Value(idx) += alpha*val_at_ik*val_at_kj;
-                }
-          });
-#else
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(member, 0, nnz_a * nnz_b ),
-          [&](const ordinal_type ii) {
-             const ordinal_type i = ii / nnz_a ;
-             const ordinal_type j = ii % nnz_a ;
-
-             row_view_type &c = C.RowView( a.Col(i) );
-
-             // Binary search for c's index of b.Col(j)
-             const ordinal_type idx = c.Index( b.Col(j) );
-
-             if (idx >= 0) {
-               // const value_type   val_at_ik = conj(a.Value(i));
-               c.Value(idx) += alpha * a.Value(i) * b.Value(j);
-             }
-          });
-#endif
-
-        member.team_barrier();
-      }
-    }
-
-    return 0;
-  }
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp b/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp
deleted file mode 100644
index d2dd004579a507439b457a12a6f0de909bf33acd..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp
+++ /dev/null
@@ -1,427 +0,0 @@
-#pragma once
-#ifndef __GRAPH_HELPER_SCOTCH_HPP__
-#define __GRAPH_HELPER_SCOTCH_HPP__
-
-/// \file graph_helper_scotch.hpp
-/// \brief Interface to scotch reordering
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "scotch.h"
-#include "util.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<class CrsMatBaseType>
-  class GraphHelper_Scotch : public Disp {
-  public:
-    typedef typename CrsMatBaseType::ordinal_type ordinal_type;
-    typedef typename CrsMatBaseType::size_type    size_type;
-
-    typedef typename CrsMatBaseType::ordinal_type_array ordinal_type_array;
-    typedef typename CrsMatBaseType::size_type_array    size_type_array;
-
-  private:
-    string _label;
-
-    // scotch main data structure
-    SCOTCH_Graph _graph;
-    SCOTCH_Num _strat;
-    int _level;
-
-    // scotch input has no diagonal contribution
-    ordinal_type _base,_m;
-    ordinal_type_array _cidx;
-
-    size_type _nnz;
-    size_type_array _rptr;
-
-    // scotch output
-    ordinal_type _cblk;
-    ordinal_type_array _perm,_peri,_range,_tree;
-
-    // status flag
-    bool _is_ordered;
-
-  public:
-
-    void setLabel(string label) { _label = label; }
-    string Label() const { return _label; }
-
-    size_type NumNonZeros() const { return _nnz; }
-    ordinal_type NumRows() const { return _m; }
-
-    size_type_array RowPtrVector() const { return _rptr; }
-    ordinal_type_array ColIndexVector() const { return _cidx; }
-
-    ordinal_type_array PermVector()    const { return _perm; }
-    ordinal_type_array InvPermVector() const { return _peri; }
-
-    ordinal_type_array RangeVector()   const { return _range; }
-    ordinal_type_array TreeVector()    const { return _tree; }
-
-    ordinal_type NumBlocks() const { return _cblk; }
-
-    GraphHelper_Scotch() = default;
-
-    // convert graph first
-    GraphHelper_Scotch(const string label,
-                       const ordinal_type m,
-                       const size_type_array rptr,
-                       const ordinal_type_array cidx,
-                       const int seed = GraphHelper::DefaultRandomSeed) {
-
-      _label = "GraphHelper_Scotch::" + label;
-
-      _is_ordered = false;
-      _cblk  = 0;
-
-      // scotch does not allow self-contribution (diagonal term in sparse matrix)
-      _base  = 0; //A.BaseVal();
-      _m     = m; // A.NumRows();
-      _nnz   = rptr[m]; //A.NumNonZeros();
-
-      _rptr  = rptr; //size_type_array(_label+"::RowPtrArray", _m+1);
-      _cidx  = cidx; //ordinal_type_array(_label+"::ColIndexArray", _nnz);
-
-      _perm  = ordinal_type_array(_label+"::PermutationArray", _m);
-      _peri  = ordinal_type_array(_label+"::InvPermutationArray", _m);
-      _range = ordinal_type_array(_label+"::RangeArray", _m);
-      _tree  = ordinal_type_array(_label+"::TreeArray", _m);
-
-      // create a graph structure without diagonals
-      _strat = 0;
-      _level = 0;
-
-      //A.convertGraph(_nnz, _rptr, _cidx);
-
-      int ierr = 0;
-      ordinal_type *rptr_ptr = reinterpret_cast<ordinal_type*>(_rptr.ptr_on_device());
-      ordinal_type *cidx_ptr = reinterpret_cast<ordinal_type*>(_cidx.ptr_on_device());
-
-      if (seed != GraphHelper::DefaultRandomSeed) {
-        SCOTCH_randomSeed(seed);
-        SCOTCH_randomReset();
-      }
-
-      ierr = SCOTCH_graphInit(&_graph);CHKERR(ierr);
-      ierr = SCOTCH_graphBuild(&_graph,             // scotch graph
-                               _base,               // base value
-                               _m,                  // # of vertices
-                               rptr_ptr,                // column index array pointer begin
-                               rptr_ptr+1,              // column index array pointer end
-                               NULL,                // weights on vertices (optional)
-                               NULL,                // label array on vertices (optional)
-                               _nnz,                // # of nonzeros
-                               cidx_ptr,                // column index array
-                               NULL);CHKERR(ierr);  // edge load array (optional)
-      ierr = SCOTCH_graphCheck(&_graph);CHKERR(ierr);
-    }
-    GraphHelper_Scotch(const GraphHelper_Scotch &b) = default;
-
-    virtual~GraphHelper_Scotch() {
-      SCOTCH_graphFree(&_graph);
-    }
-
-    void setStratGraph(const SCOTCH_Num strat = 0) {
-      _strat = strat;
-    }
-
-    void setTreeLevel(const int level = 0) {
-      _level = level;
-    }
-
-    int computeOrdering(const ordinal_type treecut = 0,
-                        const ordinal_type minblksize = 0) {
-      int ierr = 0;
-
-      // pointers for global graph ordering
-      ordinal_type *perm  = _perm.ptr_on_device();
-      ordinal_type *peri  = _peri.ptr_on_device();
-      ordinal_type *range = _range.ptr_on_device();
-      ordinal_type *tree  = _tree.ptr_on_device();
-
-      {
-        const int level = (_level ? _level : max(1, int(log2(_m)-treecut))); // level = log2(_nnz)+10;
-        SCOTCH_Strat stradat;
-        SCOTCH_Num straval = _strat;
-                              //(SCOTCH_STRATLEVELMAX));//   |
-                              //SCOTCH_STRATLEVELMIN   |
-                              //SCOTCH_STRATLEAFSIMPLE |
-                              //SCOTCH_STRATSEPASIMPLE);
-
-        ierr = SCOTCH_stratInit(&stradat);CHKERR(ierr);
-
-        // if both are zero, do not run strategy
-        if (_strat || _level) {
-          cout << "GraphHelper_Scotch:: User provide a strategy and/or level" << endl
-               << "                     strategy = " << _strat << ", level =  " << _level << endl;
-          ierr = SCOTCH_stratGraphOrderBuild (&stradat, straval, level, 0.2);CHKERR(ierr);
-        }
-        ierr = SCOTCH_graphOrder(&_graph,
-                                 &stradat,
-                                 perm,
-                                 peri,
-                                 &_cblk,
-                                 range,
-                                 tree);CHKERR(ierr);
-        SCOTCH_stratExit(&stradat);
-      }
-
-#if 0
-      {
-        // assume there are multiple roots
-        range[_cblk+1] = range[_cblk]; // dummy range
-        tree[_cblk] = -1;              // dummy root
-        for (ordinal_type i=0;i<_cblk;++i)
-          if (tree[i] == -1)           // multiple roots becomes children of the hummy root
-            tree[i] = (_cblk+1);
-        ++_cblk;                       // include the dummy root
-      }
-#endif
-
-      // provided blksize is greater than 0, reorder internally
-      // if (treecut > 0 && minblksize > 0) {
-      //   // graph array
-      //   ordinal_type *rptr_ptr = reinterpret_cast<ordinal_type*>(_rptr.ptr_on_device());
-      //   ordinal_type *cidx_ptr = reinterpret_cast<ordinal_type*>(_cidx.ptr_on_device());
-
-      //   // create workspace in
-      //   size_type_array    rptr_work = size_type_array(_label+"::Block::RowPtrArray", _m+1);
-      //   ordinal_type_array cidx_work = ordinal_type_array(_label+"::Block::ColIndexArray", _nnz);
-
-      //   // create workspace output
-      //   ordinal_type_array perm_work  = ordinal_type_array(_label+"::Block::PermutationArray", _m);
-      //   ordinal_type_array peri_work  = ordinal_type_array(_label+"::Block::InvPermutationArray", _m);
-      //   ordinal_type_array range_work = ordinal_type_array(_label+"::Block::RangeArray", _m);
-      //   ordinal_type_array tree_work  = ordinal_type_array(_label+"::Block::TreeArray", _m);
-
-      //   // scotch input
-      //   ordinal_type *rptr_blk = reinterpret_cast<ordinal_type*>(rptr_work.ptr_on_device());
-      //   ordinal_type *cidx_blk = reinterpret_cast<ordinal_type*>(cidx_work.ptr_on_device());
-
-      //   size_type nnz = 0;
-      //   rptr_blk[0] = nnz;
-
-      //   for (ordinal_type iblk=0;iblk<_cblk;++iblk) {
-      //     // allocate graph
-      //     SCOTCH_Graph graph;
-
-      //     ierr = SCOTCH_graphInit(&graph);CHKERR(ierr);
-
-      //     SCOTCH_Strat stradat;
-      //     SCOTCH_Num straval = (/*SCOTCH_STRATLEVELMAX   |
-      //                             SCOTCH_STRATLEVELMIN   |*/
-      //                           SCOTCH_STRATLEAFSIMPLE |
-      //                           SCOTCH_STRATSEPASIMPLE);
-
-      //     ierr = SCOTCH_stratInit(&stradat);CHKERR(ierr);
-      //     ierr = SCOTCH_stratGraphOrderBuild(&stradat, straval, 0, 0.2);CHKERR(ierr);
-
-      //     const ordinal_type ibegin = range[iblk], iend = range[iblk+1], m = iend - ibegin;
-
-      //     // scotch output
-      //     ordinal_type cblk_blk = 0;
-
-      //     ordinal_type *perm_blk  = perm_work.ptr_on_device()  + ibegin;
-      //     ordinal_type *peri_blk  = peri_work.ptr_on_device()  + ibegin;
-      //     ordinal_type *range_blk = range_work.ptr_on_device() + ibegin;
-      //     ordinal_type *tree_blk  = tree_work.ptr_on_device()  + ibegin;
-
-      //     // if each blk is greater than the given minblksize, reorder internally
-      //     if (m > minblksize) {
-      //       for (int i=ibegin;i<iend;++i) {
-      //         const ordinal_type ii = peri[i];
-      //         const ordinal_type jbegin = rptr_ptr[ii];
-      //         const ordinal_type jend = rptr_ptr[ii+1];
-
-      //         for (int j=jbegin;j<jend;++j) {
-      //           const ordinal_type jj = perm[cidx_ptr[j]];
-      //           if (ibegin <= jj && jj < iend)
-      //             cidx_blk[nnz++] = (jj - ibegin);
-      //         }
-      //         rptr_blk[i+1] = nnz;
-      //       }
-      //       const size_type nnz_blk = nnz - rptr_blk[ibegin];
-
-      //       ierr = SCOTCH_graphBuild(&graph,             // scotch graph
-      //                                0,                  // base value
-      //                                m,                  // # of vertices
-      //                                &rptr_blk[ibegin],  // column index array pointer begin
-      //                                &rptr_blk[ibegin]+1,// column index array pointer end
-      //                                NULL,               // weights on vertices (optional)
-      //                                NULL,               // label array on vertices (optional)
-      //                                nnz_blk,            // # of nonzeros
-      //                                cidx_blk,           // column index array
-      //                                NULL);CHKERR(ierr); // edge load array (optional)
-      //       ierr = SCOTCH_graphCheck(&graph);CHKERR(ierr);
-      //       ierr = SCOTCH_graphOrder(&graph,
-      //                                &stradat,
-      //                                perm_blk,
-      //                                peri_blk,
-      //                                &cblk_blk,
-      //                                range_blk,
-      //                                tree_blk);CHKERR(ierr);
-      //     } else {
-      //       for (ordinal_type i=0;i<m;++i) {
-      //         perm_blk[i] = i;
-      //         peri_blk[i] = i;
-      //       }
-      //       range_blk[1] = m;
-      //       tree_blk[0] = -1;
-      //     }
-
-      //     SCOTCH_stratExit(&stradat);
-      //     SCOTCH_graphFree(&graph);
-
-      //     for (ordinal_type i=0;i<m;++i) {
-      //       const ordinal_type ii = peri_blk[i] + ibegin;
-      //       peri_blk[i] = peri[ii];
-      //     }
-      //     for (ordinal_type i=0;i<m;++i) {
-      //       const ordinal_type ii = i + ibegin;
-      //       peri[ii] = peri_blk[i];
-      //     }
-
-      //   }
-
-      //   for (ordinal_type i=0;i<_m;++i)
-      //     perm[peri[i]] = i;
-      // }
-
-      _is_ordered = true;
-
-      //cout << "SCOTCH level = " << level << endl;
-      //cout << "Range   Tree " << endl;
-      //for (int i=0;i<_cblk;++i)
-      //  cout << _range[i] << " :: " << i << " " << _tree[i] << endl;
-
-      return 0;
-    }
-
-    int pruneTree(const ordinal_type cut) {
-      if (cut <=0 ) return 0;
-
-      ordinal_type_array work = ordinal_type_array(_label+"::WorkArray", _cblk+1);
-      for (ordinal_type iter=0;iter<cut && _cblk > 1;++iter) {
-        // horizontal merging
-        {
-          ordinal_type cnt = 0;
-          ordinal_type parent = _tree[0];
-          work[0] = cnt;
-          for (ordinal_type i=1;i<_cblk;++i) {
-            const ordinal_type myparent = _tree[i];
-            if (myparent == parent) {
-              work[i] = cnt;
-            } else {
-              parent = _tree[i];
-              work[i] = ++cnt;
-            }
-          }
-          work[_cblk] = ++cnt;
-
-          ordinal_type prev = -2;
-          const ordinal_type root = _cblk - 1;
-          for (ordinal_type i=0;i<root;++i) {
-            const ordinal_type myparent = _tree[i];
-            const ordinal_type me = work[i];
-
-            _tree[me] = work[myparent];
-            if (prev != me) {
-              _range[me] = _range[i];
-              prev = me;
-            }
-          }
-          {
-            const ordinal_type me = work[root];
-            _tree[me] = -1;
-            _range[me] = _range[root];
-
-            _range[work[root+1]] = _range[root+1];
-            _cblk = cnt;
-          }
-        }
-
-        // vertical merging
-        if (_cblk == 2) {
-          _tree[0] = -1;
-          _range[0] = 0;
-          _range[1] = _range[2];
-          _cblk = 1;
-        } else {
-          ordinal_type cnt = 0;
-          for (ordinal_type i=0;i<_cblk;++i) {
-            const ordinal_type diff = _tree[i+1] - _tree[i];
-            work[i] = (diff == 1 ? cnt : cnt++);
-          }
-          work[_cblk] = cnt;
-
-          ordinal_type prev = -2;
-          const ordinal_type root = _cblk - 1;
-          for (ordinal_type i=0;i<root;++i) {
-            const ordinal_type myparent = _tree[i];
-            const ordinal_type me = work[i];
-
-            _tree[me] = work[myparent];
-            if (prev != me) {
-              _range[me] = _range[i];
-              prev = me;
-            }
-          }
-          {
-            const ordinal_type me = work[root];
-            _tree[me] = -1;
-            _range[me] = _range[root];
-
-            _range[work[root+1]] = _range[root+1];
-            _cblk = cnt;
-          }
-        }
-      }
-
-      // cleaning
-      {
-        for (ordinal_type i=(_cblk+1);i<_m;++i) {
-          _tree[i] = 0;
-          _range[i] = 0;
-        }
-        _tree[_cblk] = 0;
-      }
-
-      return 0;
-    }
-
-    ostream& showMe(ostream &os) const {
-      streamsize prec = os.precision();
-      os.precision(15);
-      os << scientific;
-
-      os << " -- Scotch input -- " << endl
-         << "    Base Value     = " << _base << endl
-         << "    # of Rows      = " << _m << endl
-         << "    # of NonZeros  = " << _nnz << endl;
-
-      if (_is_ordered)
-        os << " -- Ordering -- " << endl
-           << "    CBLK   = " << _cblk << endl
-           << "  PERM     PERI     RANG     TREE" << endl;
-
-      const int w = 6;
-      for (ordinal_type i=0;i<_m;++i)
-        os << setw(w) << _perm[i] << "   "
-           << setw(w) << _peri[i] << "   "
-           << setw(w) << _range[i] << "   "
-           << setw(w) << _tree[i] << endl;
-
-      os.unsetf(ios::scientific);
-      os.precision(prec);
-
-      return os;
-    }
-
-  };
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/herk.hpp b/lib/kokkos/example/ichol/src/herk.hpp
deleted file mode 100644
index 548c495c448604d2bffd7a5dd1d9745ce440fc9e..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/herk.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#pragma once
-#ifndef __HERK_HPP__
-#define __HERK_HPP__
-
-/// \file herk.hpp
-/// \brief Sparse hermitian rank one update on given sparse patterns.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "control.hpp"
-#include "partition.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<int ArgUplo, int ArgTrans, int ArgAlgo,
-           int ArgVariant = Variant::One,
-           template<int,int> class ControlType = Control>
-  struct Herk {
-
-    // data-parallel interface
-    // =======================
-    template<typename ScalarType,
-             typename ExecViewTypeA,
-             typename ExecViewTypeC>
-    KOKKOS_INLINE_FUNCTION
-    static int invoke(typename ExecViewTypeA::policy_type &policy,
-                      const typename ExecViewTypeA::policy_type::member_type &member,
-                      const ScalarType alpha,
-                      typename ExecViewTypeA::matrix_type &A,
-                      const ScalarType beta,
-                      typename ExecViewTypeC::matrix_type &C);
-
-    // task-data parallel interface
-    // ============================
-    template<typename ScalarType,
-             typename ExecViewTypeA,
-             typename ExecViewTypeC>
-    class TaskFunctor {
-    public:
-      typedef typename ExecViewTypeA::policy_type policy_type;
-      typedef typename policy_type::member_type member_type;
-      typedef int value_type;
-
-    private:
-      ScalarType _alpha, _beta;
-      typename ExecViewTypeA::matrix_type _A;
-      typename ExecViewTypeC::matrix_type _C;
-
-      policy_type _policy;
-
-    public:
-      KOKKOS_INLINE_FUNCTION
-      TaskFunctor(const policy_type & P,
-                  const ScalarType alpha,
-                  const typename ExecViewTypeA::matrix_type & A,
-                  const ScalarType beta,
-                  const typename ExecViewTypeC::matrix_type & C)
-        : _alpha(alpha),
-          _beta(beta),
-          _A(A),
-          _C(C),
-          _policy(P)
-      { }
-
-      string Label() const { return "Herk"; }
-
-      // task execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(value_type &r_val) {
-        r_val = Herk::invoke<ScalarType,ExecViewTypeA,ExecViewTypeC>(_policy, _policy.member_single(), 
-                             _alpha, _A, _beta, _C);
-      }
-
-      // task-data execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(const member_type &member, value_type &r_val) {
-        r_val = Herk::invoke<ScalarType,ExecViewTypeA,ExecViewTypeC>(_policy, member, 
-                             _alpha, _A, _beta, _C);
-      }
-
-    };
-
-  };
-
-}
-
-#include "herk_u_ct.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/herk_u_ct.hpp b/lib/kokkos/example/ichol/src/herk_u_ct.hpp
deleted file mode 100644
index 6de4a2fa5628f0bdd77da6fdfc916ad112569fce..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/herk_u_ct.hpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-#ifndef __HERK_U_CT_HPP__
-#define __HERK_U_CT_HPP__
-
-/// \file herk_u_ct.hpp
-/// \brief Sparse hermitian rank one update on given sparse patterns.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "herk_u_ct_for_factor_blocked.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp
deleted file mode 100644
index 58bba2be3c9c5fba07a3a36a77545bca917778c3..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#pragma once
-#ifndef __HERK_U_CT_FOR_FACTOR_BLOCKED_HPP__
-#define __HERK_U_CT_FOR_FACTOR_BLOCKED_HPP__
-
-/// \file herk_u_ct_for_factor_blocked.hpp
-/// \brief Sparse hermitian rank one update on given sparse patterns.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho {
-
-  using namespace std;
-
-
-  // Herk used in the factorization phase
-  // ====================================
-  template<>
-  template<typename ScalarType,
-           typename CrsExecViewTypeA,
-           typename CrsExecViewTypeC>
-  KOKKOS_INLINE_FUNCTION
-  int
-  Herk<Uplo::Upper,Trans::ConjTranspose,
-       AlgoHerk::ForFactorBlocked>
-  ::invoke(typename CrsExecViewTypeA::policy_type &policy,
-           const typename CrsExecViewTypeA::policy_type::member_type &member,
-           const ScalarType alpha,
-           typename CrsExecViewTypeA::matrix_type &A,
-           const ScalarType beta,
-           typename CrsExecViewTypeC::matrix_type &C) {
-    typedef typename CrsExecViewTypeA::ordinal_type      ordinal_type;
-    typedef typename CrsExecViewTypeA::value_type        value_type;
-    typedef typename CrsExecViewTypeA::row_view_type     row_view_type;
-
-
-if ( false && member.team_rank() == 0 ) {
- printf("Herk [%d +%d)x[%d +%d)\n"
-       , C.OffsetRows()
-       , C.NumRows()
-       , C.OffsetCols()
-       , C.NumCols()
-       );
-}
-
-    // scale the matrix C with beta
-    scaleCrsMatrix<ScalarType,CrsExecViewTypeC>(member, beta, C);
-
-    // C(i,j) += alpha*A'(i,k)*A(k,j)
-    for (ordinal_type k=0;k<A.NumRows();++k) {
-      row_view_type &a = A.RowView(k);
-      const ordinal_type nnz = a.NumNonZeros();
-
-      if (nnz > 0) {
-
-#if 0
-
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(member, 0, nnz),
-            [&](const ordinal_type i) {
-              const ordinal_type row_at_i  = a.Col(i);
-               // const value_type   val_at_ik = conj(a.Value(i));
-               const value_type   val_at_ik = a.Value(i);
-
-               row_view_type &c = C.RowView(row_at_i);
-
-               ordinal_type idx = 0;
-               for (ordinal_type j=i;j<nnz && (idx > -2);++j) {
-                 const ordinal_type col_at_j  = a.Col(j);
-                 const value_type   val_at_kj = a.Value(j);
-
-                 idx = c.Index(col_at_j, idx);
-                 if (idx >= 0)
-                   c.Value(idx) += alpha*val_at_ik*val_at_kj;
-               }
-             });
-#else
-
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(member, 0, nnz*nnz),
-            [&](const ordinal_type ii) {
-               const ordinal_type i = ii / nnz ;
-               const ordinal_type j = ii % nnz ;
-
-               row_view_type &c = C.RowView( a.Col(i) );
-
-               const ordinal_type idx = c.Index( a.Col(j) );
-
-               if (idx >= 0) {
-                 c.Value(idx) += alpha* a.Value(i) * a.Value(j);
-               }
-             });
-
-#endif
-
-        member.team_barrier();
-      }
-    }
-
-    return 0;
-  }
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/norm.hpp b/lib/kokkos/example/ichol/src/norm.hpp
deleted file mode 100644
index be77ee0dcf2b27f6a7e50fb8eeacb45dc9d50e82..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/norm.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#pragma once
-#ifndef __NORM_HPP__
-#define __NORM_HPP__
-
-/// \file norm.hpp
-/// \brief Compute norm of sparse or dense matrices.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<typename DenseExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  auto
-  normOneDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) {
-    typedef typename DenseExecViewType::ordinal_type  ordinal_type;
-    typedef typename DenseExecViewType::value_type    value_type;
-    typedef decltype(real(value_type())) norm_type;
-
-    const ordinal_type mA = A.NumRows();
-    const ordinal_type nA = A.NumCols();
-
-    norm_type r_val = 0.0;
-
-    for (ordinal_type j=0;j<nA;++j) {
-      norm_type col_sum_at_j = 0.0;
-      for (ordinal_type i=0;i<mA;++i)
-        col_sum_at_j += abs(A.Value(i,j));
-      r_val = max(r_val, col_sum_at_j);
-    }
-    return r_val;
-  }
-
-  template<typename DenseExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  auto
-  normInfDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) {
-    typedef typename DenseExecViewType::ordinal_type  ordinal_type;
-    typedef typename DenseExecViewType::value_type    value_type;
-    typedef decltype(real(value_type())) norm_type;
-
-    const ordinal_type mA = A.NumRows();
-    const ordinal_type nA = A.NumCols();
-
-    norm_type r_val = 0.0;
-
-    for (ordinal_type i=0;i<mA;++i) {
-      norm_type row_sum_at_i = 0.0;
-      for (ordinal_type j=0;j<nA;++j) 
-        row_sum_at_i += abs(A.Value(i,j));
-      r_val = max(r_val, row_sum_at_i);
-    }
-    return r_val;
-  }
-  
-  template<typename DenseExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  auto
-  normFrobeniusDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) {
-    typedef typename DenseExecViewType::ordinal_type  ordinal_type;
-    typedef typename DenseExecViewType::value_type    value_type;
-    typedef decltype(real(value_type())) norm_type;
-
-    const ordinal_type mA = A.NumRows();
-    const ordinal_type nA = A.NumCols();
-
-    norm_type r_val = 0.0;
-
-    for (ordinal_type i=0;i<mA;++i) 
-      for (ordinal_type j=0;j<nA;++j) {
-        value_type val = A.Value(i,j);
-        // r_val += conj(val)*val;
-        r_val += val*val;
-      }
-    return sqrt(r_val);
-  }
-
-}
-
-#endif
-
diff --git a/lib/kokkos/example/ichol/src/partition.hpp b/lib/kokkos/example/ichol/src/partition.hpp
deleted file mode 100644
index a3e9f7095a6b82b62e6c27bc5f91db0e253b0451..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/partition.hpp
+++ /dev/null
@@ -1,381 +0,0 @@
-
-#ifndef __PARTITION_HPP__
-#define __PARTITION_HPP__
-
-/// \file partition.hpp
-/// \brief Matrix partitioning utilities.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-
-  using namespace std;
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Part_2x2(const MatView A, MatView &ATL, MatView &ATR, 
-           /**************/ MatView &ABL, MatView &ABR,
-           const typename MatView::ordinal_type bm, 
-           const typename MatView::ordinal_type bn,
-           const int quadrant) {
-    typename MatView::ordinal_type bmm, bnn;
-
-    switch (quadrant) {
-    case Partition::TopLeft:
-      bmm = min(bm, A.NumRows());
-      bnn = min(bn, A.NumCols());                
-      
-      ATL.setView(A.BaseObject(),
-                  A.OffsetRows(), bmm,
-                  A.OffsetCols(), bnn);
-      break;
-    case Partition::TopRight:
-    case Partition::BottomLeft:
-      Kokkos::abort("Tacho::Part_2x2 Not yet implemented");
-      break;
-    case Partition::BottomRight:
-      bmm = A.NumRows() - min(bm, A.NumRows());
-      bnn = A.NumCols() - min(bn, A.NumCols());                
-      
-      ATL.setView(A.BaseObject(),
-                  A.OffsetRows(), bmm,
-                  A.OffsetCols(), bnn);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_2x2 Invalid Input");
-      break;
-    }
-    
-    ATR.setView(A.BaseObject(),
-                A.OffsetRows(),                 ATL.NumRows(),
-                A.OffsetCols() + ATL.NumCols(), A.NumCols() - ATL.NumCols());
-    
-    ABL.setView(A.BaseObject(),
-                A.OffsetRows() + ATL.NumRows(), A.NumRows() - ATL.NumRows(),
-                A.OffsetCols(),                 ATL.NumCols());
-    
-    ABR.setView(A.BaseObject(),
-                A.OffsetRows() + ATL.NumRows(), A.NumRows() - ATL.NumRows(),
-                A.OffsetCols() + ATL.NumCols(), A.NumCols() - ATL.NumCols());
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Part_1x2(const MatView A, MatView &AL, MatView &AR, 
-           const typename MatView::ordinal_type bn,
-           const int side) {
-    typename MatView::ordinal_type bmm, bnn;
-
-    switch (side) {
-    case Partition::Left:
-      bmm = A.NumRows();
-      bnn = min(bn, A.NumCols());
-      
-      AL.setView(A.BaseObject(),
-                 A.OffsetRows(), bmm,
-                 A.OffsetCols(), bnn);
-      break;
-    case Partition::Right:
-      bmm = A.NumRows();
-      bnn = A.NumCols() - min(bn, A.NumCols());
-
-      AL.setView(A.BaseObject(),
-                 A.OffsetRows(), bmm,
-                 A.OffsetCols(), bnn);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_1x2 Invalid Input");
-      break;
-    }
-
-    AR.setView(A.BaseObject(),
-               A.OffsetRows(),                A.NumRows(),
-               A.OffsetCols() + AL.NumCols(), A.NumCols() - AL.NumCols());
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Part_2x1(const MatView A, MatView &AT, 
-           /*************/  MatView &AB, 
-           const typename MatView::ordinal_type bm,
-           const int side) {
-    typename MatView::ordinal_type bmm, bnn;
-    
-    switch (side) {
-    case Partition::Top:
-      bmm = min(bm, A.NumRows());
-      bnn = A.NumCols();
-      
-      AT.setView(A.BaseObject(),
-                 A.OffsetRows(), bmm,
-                 A.OffsetCols(), bnn);
-      break;
-    case Partition::Bottom:
-      bmm = A.NumRows() - min(bm, A.NumRows());
-      bnn = A.NumCols();
-
-      AT.setView(A.BaseObject(),
-                 A.OffsetRows(), bmm,
-                 A.OffsetCols(), bnn);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_2x1 Invalid Input");
-      break;
-    }
-    
-    AB.setView(A.BaseObject(),
-               A.OffsetRows() + AT.NumRows(), A.NumRows() - AT.NumRows(),
-               A.OffsetCols(),                A.NumCols());
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Part_2x2_to_3x3(const MatView ATL, const MatView ATR, MatView &A00, MatView &A01, MatView &A02,
-                  /***********************************/ MatView &A10, MatView &A11, MatView &A12,
-                  const MatView ABL, const MatView ABR, MatView &A20, MatView &A21, MatView &A22,
-                  const typename MatView::ordinal_type bm, 
-                  const typename MatView::ordinal_type bn,
-                  const int quadrant) {
-    switch (quadrant) {
-    case Partition::TopLeft:
-      Part_2x2(ATL, A00, A01,
-               /**/ A10, A11, 
-               bm, bn, Partition::BottomRight);
-
-      Part_2x1(ATR, A02, 
-               /**/ A12,
-               bm, Partition::Bottom);
-
-      Part_1x2(ABL, A20, A21,
-               bn, Partition::Right);
-
-      A22.setView(ABR.BaseObject(),
-                  ABR.OffsetRows(), ABR.NumRows(),
-                  ABR.OffsetCols(), ABR.NumCols());
-      break;
-    case Partition::TopRight:
-    case Partition::BottomLeft:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    case Partition::BottomRight:
-      A00.setView(ATL.BaseObject(),
-                  ATL.OffsetRows(), ATL.NumRows(),
-                  ATL.OffsetCols(), ATL.NumCols());
-
-      Part_1x2(ATR, A01, A02,
-               bn, Partition::Left);
-
-      Part_2x1(ABL, A10, 
-               /**/ A20,
-               bm, Partition::Top);
-
-      Part_2x2(ABR, A11, A12,
-               /**/ A21, A22, 
-               bm, bn, Partition::TopLeft);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    }
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Part_2x1_to_3x1(const MatView AT, MatView &A0, 
-                  /***************/ MatView &A1, 
-                  const MatView AB, MatView &A2, 
-                  const typename MatView::ordinal_type bm, 
-                  const int side) {
-    switch (side) {
-    case Partition::Top:
-      Part_2x1(AT,  A0, 
-               /**/ A1,
-               bm, Partition::Bottom);
-
-      A2.setView(AB.BaseObject(),
-                 AB.OffsetRows(), AB.NumRows(),
-                 AB.OffsetCols(), AB.NumCols());
-      break;
-    case Partition::Bottom:
-      A0.setView(AT.BaseObject(),
-                 AT.OffsetRows(), AT.NumRows(),
-                 AT.OffsetCols(), AT.NumCols());
-
-      Part_2x1(AB,  A1, 
-               /**/ A2,
-               bm, Partition::Top);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    }
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Part_1x2_to_1x3(const MatView AL, const MatView AR, 
-                  MatView &A0, MatView &A1, MatView &A2,
-                  const typename MatView::ordinal_type bn, 
-                  const int side) {
-    switch (side) {
-    case Partition::Left:
-      Part_1x2(AL,  A0, A1,
-               bn, Partition::Right);
-
-      A2.setView(AR.BaseObaject(),
-                 AR.OffsetRows(), AR.NumRows(),
-                 AR.OffsetCols(), AR.NumCols());
-      break;
-    case Partition::Right:
-      A0.setView(AL.BaseObject(),
-                 AL.OffsetRows(), AL.NumRows(),
-                 AL.OffsetCols(), AL.NumCols());
-
-      Part_1x2(AR,  A1, A2,
-               bn, Partition::Left);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    }
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Merge_2x2(const MatView ATL, const MatView ATR, 
-            const MatView ABL, const MatView ABR, MatView &A) {
-    A.setView(ATL.BaseObject(),
-              ATL.OffsetRows(), ATL.NumRows() + ABR.NumRows(), 
-              ATL.OffsetCols(), ATL.NumCols() + ABR.NumCols());
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Merge_1x2(const MatView AL, const MatView AR, MatView &A) {
-    A.setView(AL.BaseObject(),
-              AL.OffsetRows(), AL.NumRows(),
-              AL.OffsetCols(), AL.NumCols() + AR.NumCols());
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Merge_2x1(const MatView AT, 
-            const MatView AB, MatView &A) {
-    A.setView(AT.BaseObject(),
-              AT.OffsetRows(), AT.NumRows() + AB.NumRows(),
-              AT.OffsetCols(), AT.NumCols());
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Merge_3x3_to_2x2(const MatView A00, const MatView A01, const MatView A02, MatView &ATL, MatView &ATR, 
-                   const MatView A10, const MatView A11, const MatView A12,
-                   const MatView A20, const MatView A21, const MatView A22, MatView &ABL, MatView &ABR,
-                   const int quadrant) {
-    switch (quadrant) {
-    case Partition::TopLeft:
-      Merge_2x2(A00, A01, 
-                A10, A11, ATL);
-      
-      Merge_2x1(A02, 
-                A12, ATR);
-
-      Merge_1x2(A20, A21, ABL);
-      
-      ABR.setView(A22.BaseObject(),
-                  A22.OffsetRows(), A22.NumRows(),
-                  A22.OffsetCols(), A22.NumCols());
-      break;
-    case Partition::TopRight:
-    case Partition::BottomLeft:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    case Partition::BottomRight:
-      ATL.setView(A00.BaseObject(),
-                  A00.OffsetRows(), A00.NumRows(),
-                  A00.OffsetCols(), A00.NumCols());
-
-      Merge_1x2(A01, A02, ATR);
-
-      Merge_2x1(A10, 
-                A20, ABL);
-
-      Merge_2x2(A11, A12, 
-                A21, A22, ABR);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    }
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Merge_3x1_to_2x1(const MatView A0, MatView &AT, 
-                   const MatView A1, 
-                   const MatView A2, MatView &AB, 
-                   const int side) {
-    switch (side) {
-    case Partition::Top:
-      Merge_2x1(A0, 
-                A1, AT);
-
-      AB.setView(A2.BaseObject(),
-                 A2.OffsetRows(), A2.NumRows(),
-                 A2.OffsetCols(), A2.NumCols());
-      break;
-    case Partition::Bottom:
-      AT.setView(A0.BaseObject(),
-                 A0.OffsetRows(), A0.NumRows(),
-                 A0.OffsetCols(), A0.NumCols());
-
-      Merge_2x1(A1, 
-                A2, AB);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    }
-  }
-
-  template<typename MatView>
-  KOKKOS_INLINE_FUNCTION 
-  void 
-  Merge_1x3_to_1x2(const MatView A0, const MatView A1, const MatView A2, 
-                   MatView &AL, MatView &AR, 
-                   const int side) {
-    switch (side) {
-    case Partition::Left:
-      Merge_1x2(A0, A1, AL);
-
-      AR.setView(A2.BaseObject(),
-                 A2.OffsetRows(), A2.NumRows(),
-                 A2.OffsetCols(), A2.NumCols());
-      break;
-    case Partition::Right:
-      AL.setView(A0.BaseObject(),
-                 A0.OffsetRows(), A0.NumRows(),
-                 A0.OffsetCols(), A0.NumCols());
-
-      Merge_1x2(A1, A2, AR);
-      break;
-    default:
-      Kokkos::abort("Tacho::Part_???");
-      break;
-    }
-  }
-
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/scale.hpp b/lib/kokkos/example/ichol/src/scale.hpp
deleted file mode 100644
index 3152520966d88caeaede7d81c8a9bf826400d610..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/scale.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma once
-#ifndef __SCALE_HPP__
-#define __SCALE_HPP__
-
-/// \file scale.hpp
-/// \brief Scaling sparse matrix.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<typename T> struct ScaleTraits {
-    typedef T scale_type;
-    // assume built-in types have appropriate type conversion
-    static constexpr T one = 1 ;
-    static constexpr T zero = 0 ;
-  };
-
-
-  template<typename ScalarType,
-           typename CrsExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  int
-  scaleCrsMatrix(const typename CrsExecViewType::policy_type::member_type &member,
-                 const ScalarType alpha,
-                 typename CrsExecViewType::matrix_type &A) {
-    typedef typename CrsExecViewType::ordinal_type  ordinal_type;
-    typedef typename CrsExecViewType::value_type    value_type;
-    typedef typename CrsExecViewType::row_view_type row_view_type;
-
-    if (alpha == ScaleTraits<value_type>::one) {
-      // do nothing
-    } else {
-      const ordinal_type mA = A.NumRows();
-      if (mA > 0) {
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mA),
-                             [&](const ordinal_type i) {
-                               row_view_type &row = A.RowView(i);
-                               for (ordinal_type j=0;j<row.NumNonZeros();++j)
-                                 row.Value(j) *= alpha;
-                             });
-        member.team_barrier();
-      }
-    }
-
-    return 0;
-  }
-
-  template<typename ScalarType,
-           typename DenseExecViewType>
-  KOKKOS_INLINE_FUNCTION
-  int
-  scaleDenseMatrix(const typename DenseExecViewType::policy_type::member_type &member,
-                   const ScalarType alpha,
-                   DenseExecViewType &A) {
-    typedef typename DenseExecViewType::ordinal_type  ordinal_type;
-    typedef typename DenseExecViewType::value_type    value_type;
-
-    if (alpha == ScaleTraits<value_type>::one) {
-      // do nothing
-    } else {
-      if (A.BaseObject().ColStride() > A.BaseObject().RowStride()) {
-        const ordinal_type nA = A.NumCols();
-        if (nA > 0) {
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nA),
-                               [&](const ordinal_type j) {
-                                 for (ordinal_type i=0;i<A.NumRows();++i)
-                                   A.Value(i, j) *= alpha;
-                               });
-          member.team_barrier();
-        }
-      } else {
-        const ordinal_type mA = A.NumRows();
-        if (mA > 0) {
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mA),
-                               [&](const ordinal_type i) {
-                                 for (ordinal_type j=0;j<A.NumCols();++j)
-                                   A.Value(i, j) *= alpha;
-                               });
-          member.team_barrier();
-        }
-      }
-    }
-
-    return 0;
-  }
-
-}
-
-#endif
-
diff --git a/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp b/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp
deleted file mode 100644
index f6c381a99817ca5254ef3563fe48941410870ad7..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp
+++ /dev/null
@@ -1,379 +0,0 @@
-#pragma once
-#ifndef __SYMBOLIC_FACTOR_HELPER_HPP__
-#define __SYMBOLIC_FACTOR_HELPER_HPP__
-
-/// \file symbolic_factor_helper.hpp
-/// \brief The class compute a nonzero pattern with a given level of fills
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<class CrsMatrixType>
-  class SymbolicFactorHelper : public Disp {
-  public:
-    typedef typename CrsMatrixType::ordinal_type ordinal_type;
-    typedef typename CrsMatrixType::size_type    size_type;
-
-    typedef typename Kokkos::HostSpace::execution_space  host_exec_space ;
-
-    typedef typename CrsMatrixType::ordinal_type_array ordinal_type_array;
-    typedef typename CrsMatrixType::size_type_array    size_type_array;
-    typedef typename CrsMatrixType::value_type_array   value_type_array;
-
-  private:
-    string _label;                   // name of this class
-
-    // matrix index base
-    CrsMatrixType _A;                // input matrix
-    ordinal_type _m, _n;             // matrix dimension
-
-    struct crs_graph {
-      size_type_array _ap;           // row ptr array
-      ordinal_type_array _aj;        // col index array
-      size_type _nnz;                // # of nonzeros
-    };
-    typedef struct crs_graph crs_graph_type;
-    crs_graph_type _in, _out;
-
-    typedef Kokkos::View<ordinal_type**, Kokkos::LayoutLeft, host_exec_space> league_specific_ordinal_type_array;
-    typedef typename league_specific_ordinal_type_array::value_type* league_specific_ordinal_type_array_ptr;
-
-    int _lsize;
-    league_specific_ordinal_type_array _queue, _visited, _distance;
-
-    void createInternalWorkSpace() {
-      _queue    = league_specific_ordinal_type_array(_label+"::QueueArray",    _m, _lsize);
-      _visited  = league_specific_ordinal_type_array(_label+"::VisitedArray",  _m, _lsize);
-      _distance = league_specific_ordinal_type_array(_label+"::DistanceArray", _m, _lsize);
-    }
-
-    void freeInternalWorkSpace() {
-      _queue    = league_specific_ordinal_type_array();
-      _visited  = league_specific_ordinal_type_array();
-      _distance = league_specific_ordinal_type_array();
-    }
-
-  public:
-
-    void setLabel(string label) { _label = label; }
-    string Label() const { return _label; }
-
-    SymbolicFactorHelper(const CrsMatrixType &A,
-                         const int lsize = (host_exec_space::thread_pool_size(0)/
-                                            host_exec_space::thread_pool_size(2)))  {
-
-      _label = "SymbolicFactorHelper::" ;
-
-      // matrix index base and the number of rows
-      _A = A;
-
-      _m = _A.NumRows();
-      _n = _A.NumCols();
-
-      // allocate memory for input crs matrix
-      _in._nnz = _A.NumNonZeros();
-      _in._ap  = size_type_array(_label+"::Input::RowPtrArray", _m+1);
-      _in._aj  = ordinal_type_array(_label+"::Input::ColIndexArray", _in._nnz);
-
-      // adjust graph structure; A is assumed to have a graph without its diagonal
-      A.convertGraph(_in._ap, _in._aj);
-      _in._nnz = _in._ap[_m];
-
-      // league size
-      _lsize = lsize;
-
-      // create workspace per league
-      createInternalWorkSpace();
-    }
-    virtual~SymbolicFactorHelper() {
-      freeInternalWorkSpace();
-    }
-
-    class Queue {
-    private:
-      league_specific_ordinal_type_array_ptr _q;
-      ordinal_type _begin, _end;
-
-    public:
-      Queue(league_specific_ordinal_type_array_ptr q)
-        : _q(q),_begin(0),_end(0) { }
-
-      ordinal_type size() const { return _end - _begin; }
-      bool empty() const { return !size(); }
-
-      void push(const ordinal_type val) { _q[_end++] = val; }
-      ordinal_type pop() { return _q[_begin++]; }
-      ordinal_type end() { return _end; }
-      void reset() { _begin = 0; _end = 0; }
-    };
-
-    class FunctorComputeNonZeroPatternInRow {
-    public:
-      typedef Kokkos::TeamPolicy<host_exec_space> policy_type;
-
-    private:
-      ordinal_type _level, _m;
-      crs_graph_type _graph;
-
-      league_specific_ordinal_type_array _queue;
-      league_specific_ordinal_type_array _visited;
-      league_specific_ordinal_type_array _distance;
-
-      size_type_array _ap;
-      ordinal_type_array _aj;
-
-      ordinal_type _phase;
-
-    public:
-      FunctorComputeNonZeroPatternInRow(const ordinal_type level,
-                                        const ordinal_type m,
-                                        const crs_graph_type &graph,
-                                        league_specific_ordinal_type_array &queue,
-                                        league_specific_ordinal_type_array &visited,
-                                        league_specific_ordinal_type_array &distance,
-                                        size_type_array &ap,
-                                        ordinal_type_array &aj)
-        : _level(level), _m(m), _graph(graph),
-          _queue(queue), _visited(visited), _distance(distance),
-          _ap(ap), _aj(aj), _phase(0)
-      { }
-
-      void setPhaseCountNumNonZeros() { _phase = 0; }
-      void setPhaseComputeColIndex()  { _phase = 1; }
-
-      inline
-      void operator()(const typename policy_type::member_type &member) const {
-        const int lrank = member.league_rank();
-        const int lsize = member.league_size();
-
-        league_specific_ordinal_type_array_ptr queue    = &_queue(0, lrank);
-        league_specific_ordinal_type_array_ptr distance = &_distance(0, lrank);
-        league_specific_ordinal_type_array_ptr visited  = &_visited(0, lrank);
-
-        for (ordinal_type i=0;i<_m;++i)
-          visited[i] = 0;
-
-        // shuffle rows to get better load balance;
-        // for instance, if ND is applied, more fills are generated in the last seperator.
-        for (ordinal_type i=lrank;i<_m;i+=lsize) {
-
-          size_type cnt = 0;
-
-          // account for the diagonal
-          switch (_phase) {
-          case 0:
-            cnt = 1;
-            break;
-          case 1:
-            cnt = _ap[i];
-            _aj[cnt++] = i;
-            break;
-          }
-
-          {
-            Queue q(queue); // fixed size queue
-
-            // initialize work space
-            q.push(i);
-            distance[i] = 0;
-
-            const ordinal_type id = (i+1);
-            visited[i] = id;
-
-            // breath first search for i
-            while (!q.empty()) {
-              const ordinal_type h = q.pop();
-              // loop over j adjancy
-              const ordinal_type jbegin = _graph._ap[h], jend = _graph._ap[h+1];
-              for (ordinal_type j=jbegin;j<jend;++j) {
-                const ordinal_type t = _graph._aj[j];
-                if (visited[t] != id) {
-                  visited[t] = id;
-
-                  if (t < i && (_level < 0 || distance[h] < _level)) {
-                    q.push(t);
-                    distance[t] = distance[h] + 1;
-                  }
-                  if (t > i) {
-                    switch (_phase) {
-                    case 0:
-                      ++cnt;
-                      break;
-                    case 1:
-                      _aj[cnt++] = t;
-                      break;
-                    }
-                  }
-                }
-              }
-            }
-
-            // clear work space
-            for (ordinal_type j=0;j<q.end();++j) {
-              const ordinal_type jj = queue[j];
-              distance[jj] = 0;
-            }
-            q.reset();
-          }
-          switch (_phase) {
-          case 0:
-            _ap[i+1] = cnt;
-            break;
-          case 1:
-            sort(_aj.data() + _ap[i] , _aj.data() + _ap[i+1]);
-            break;
-          }
-        }
-      }
-    };
-
-    class FunctorCountOffsetsInRow {
-    public:
-      typedef Kokkos::RangePolicy<host_exec_space> policy_type;
-      typedef size_type value_type;
-
-    private:
-      size_type_array _off_in_rows;
-
-    public:
-      FunctorCountOffsetsInRow(size_type_array &off_in_rows)
-        : _off_in_rows(off_in_rows)
-      { }
-
-      KOKKOS_INLINE_FUNCTION
-      void init(value_type &update) const {
-        update = 0;
-      }
-
-      KOKKOS_INLINE_FUNCTION
-      void operator()(const typename policy_type::member_type &i, value_type &update, const bool final) const {
-        update += _off_in_rows(i);
-        if (final)
-          _off_in_rows(i) = update;
-      }
-
-      KOKKOS_INLINE_FUNCTION
-      void join(volatile value_type &update,
-                volatile const value_type &input) const {
-        update += input;
-      }
-    };
-
-    int createNonZeroPattern(const ordinal_type level,
-                             const int uplo,
-                             CrsMatrixType &F) {
-      // all output array should be local and rcp in Kokkos::View manage memory (de)allocation
-      size_type_array ap = size_type_array(_label+"::Output::RowPtrArray", _m+1);
-
-      // later determined
-      ordinal_type_array aj;
-      value_type_array ax;
-      size_type nnz  = 0;
-
-      {
-        FunctorComputeNonZeroPatternInRow functor(level, _m, _in,
-                                                  _queue,
-                                                  _visited,
-                                                  _distance,
-                                                  ap,
-                                                  aj);
-
-        functor.setPhaseCountNumNonZeros();
-        Kokkos::parallel_for(typename FunctorComputeNonZeroPatternInRow::policy_type(_lsize, 1), functor);
-      }
-      {
-        FunctorCountOffsetsInRow functor(ap);
-        Kokkos::parallel_scan(typename FunctorCountOffsetsInRow::policy_type(0, _m+1), functor);
-      }
-
-      nnz  = ap[_m];
-      aj = ordinal_type_array(_label+"::Output::ColIndexArray", nnz);
-      ax = value_type_array(_label+"::Output::ValueArray", nnz);
-
-      {
-        FunctorComputeNonZeroPatternInRow functor(level, _m, _in,
-                                                  _queue,
-                                                  _visited,
-                                                  _distance,
-                                                  ap,
-                                                  aj);
-
-        functor.setPhaseComputeColIndex();
-        Kokkos::parallel_for(typename FunctorComputeNonZeroPatternInRow::policy_type(_lsize, 1), functor);
-      }
-
-      {
-        F = CrsMatrixType("dummy", _m, _n, nnz, ap, aj, ax);
-        F.add(_A);
-      }
-
-      // record the symbolic factors
-      _out._nnz = nnz;
-      _out._ap = ap;
-      _out._aj = aj;
-
-      return 0;
-    }
-
-    int createNonZeroPattern(const int uplo,
-                             CrsMatrixType &F) {
-      return createNonZeroPattern(-1, uplo, F);
-    }
-
-    ostream& showMe(ostream &os) const {
-      streamsize prec = os.precision();
-      os.precision(15);
-      os << scientific;
-
-      const int w = 6;
-
-      os << " -- Matrix Dimension -- " << endl
-         << "    # of Rows  = " << _m << endl
-         << "    # of Cols  = " << _n << endl;
-
-      os << endl;
-
-      os << " -- Input Graph Without Diagonals -- " << endl
-         << "    # of NonZeros  = " << _in._nnz << endl ;
-
-      os << " -- Input Graph :: RowPtr -- " << endl;
-      {
-        const ordinal_type n0 = _in._ap.dimension_0();
-        for (ordinal_type i=0;i<n0;++i)
-          os << setw(w) << i
-             << setw(w) << _in._ap[i]
-             << endl;
-      }
-
-      os << endl;
-
-      os << " -- Output Graph With Diagonals-- " << endl
-         << "    # of NonZeros  = " << _out._nnz << endl ;
-
-      os << " -- Output Graph :: RowPtr -- " << endl;
-      {
-        const ordinal_type n0 = _out._ap.dimension_0();
-        for (ordinal_type i=0;i<n0;++i)
-          os << setw(w) << i
-             << setw(w) << _out._ap[i]
-             << endl;
-      }
-
-      os.unsetf(ios::scientific);
-      os.precision(prec);
-
-      return os;
-    }
-
-  };
-
-}
-
-#endif
-
-
-
diff --git a/lib/kokkos/example/ichol/src/symbolic_task.hpp b/lib/kokkos/example/ichol/src/symbolic_task.hpp
deleted file mode 100644
index f6cdc28ab133d123803fff40d5906cfaa58371ea..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/symbolic_task.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-#pragma once
-#ifndef __SYMBOLIC_TASK_HPP__
-#define __SYMBOLIC_TASK_HPP__
-
-/// \file symbolic_task.hpp
-/// \brief Provides tasking interface with graphviz output.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-  
-  using namespace std;
-
-  /// \brief Graphviz color mapping for the generated tasks.
-  static map<string,string> g_graphviz_color = {
-    { "chol/scalar", "indianred2"},
-    { "chol/trsm",   "orange2"   },
-    { "chol/gemm",   "lightblue2"} };
-
-  class SymbolicTaskQueue;
-
-  class SymbolicTask {
-  private:
-    string _name;
-    set<SymbolicTask*> _dep_tasks;
-
-  public:
-    // at this moment, make the queue global
-    // but this should be local and work with 
-    // multiple queues with separate thread teams
-    typedef SymbolicTaskQueue queue;
-
-    SymbolicTask() 
-      : _name("no-name") 
-    { }
-    
-    SymbolicTask(const SymbolicTask &b) 
-      : _name(b._name)
-    { }
-    
-    SymbolicTask(const string name) 
-      : _name(name) 
-    { }
-
-    int addDependence(SymbolicTask *b) {
-      if (b != NULL) 
-        _dep_tasks.insert(b);
-      return 0;
-    }
-
-    int clearDependence() {
-      _dep_tasks.clear();
-      return 0;
-    }
-
-    ostream& showMe(ostream &os) const {
-      os << "    uid = " << this << " , name = " << _name << ", # of deps = " << _dep_tasks.size()  << endl;
-      if (_dep_tasks.size()) {
-        for (auto it=_dep_tasks.begin();it!=_dep_tasks.end();++it) 
-          os << "          " << (*it) << " , name = " << (*it)->_name << endl;
-      }
-      return os;
-    }    
-
-    ostream& graphviz(ostream &os) const {
-      os << (long)(this) 
-         << " [label=\"" << _name ;
-      auto it = g_graphviz_color.find(_name);
-      if (it != g_graphviz_color.end())
-        os << "\" ,style=filled,color=\"" << it->second << "\" "; 
-      os << "];";
-      for (auto it=_dep_tasks.begin();it!=_dep_tasks.end();++it) 
-        os << (long)(*it) << " -> " << (long)this << ";";
-      return (os << endl);
-    }
-
-  };
-
-  static vector<SymbolicTask*> g_queue;
-
-  class SymbolicTaskQueue {
-  public:
-    static SymbolicTask* push(SymbolicTask *task) {
-      g_queue.push_back(task);
-      return g_queue.back();
-    }
-
-    static int clear() {
-      for (auto it=g_queue.begin();it!=g_queue.end();++it)
-        delete (*it);
-      g_queue.clear();
-      return 0;
-    }
-
-    static ostream& showMe(ostream &os) {
-      if (g_queue.size()) {
-        os << " -- Symbolic Task Queue -- " << endl;
-        for (auto it=g_queue.begin();it!=g_queue.end();++it)
-          (*it)->showMe(os);
-      } else {
-        os << " -- Symbolic Task Queue is empty -- " << endl;
-      }
-      return os;
-    }
-
-    static ostream& graphviz(ostream &os, 
-                             const double width = 7.5,
-                             const double length = 10.0) {
-      os << "digraph TaskGraph {" << endl;
-      os << "size=\"" << width << "," << length << "\";" << endl;
-      for (auto it=g_queue.begin();it!=g_queue.end();++it) 
-        (*it)->graphviz(os);
-      os << "}" << endl;
-      return (os << endl);
-    }
-  };
-  
-}
-#endif
diff --git a/lib/kokkos/example/ichol/src/task_factory.hpp b/lib/kokkos/example/ichol/src/task_factory.hpp
deleted file mode 100644
index b829da6737dfa3423b800aa6021b2c33e94b2c78..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/task_factory.hpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#pragma once
-#ifndef __TASK_FACTORY_HPP__
-#define __TASK_FACTORY_HPP__
-
-/// \file task_factory.hpp
-/// \brief A wrapper for task policy and future with a provided space type.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-
-  using namespace std;
-
-  /// \class TaskFactory
-  /// \brief Minimal interface to Kokkos tasking.
-  ///
-  /// TaskFactory is attached to blocks as a template argument in order to 
-  /// create and manage tasking future objects. Note that policy (shared 
-  /// pointer to the task generator) is not a member object in this class.
-  /// This class includes minimum interface for tasking with type decralation 
-  /// of the task policy and template alias of future so that future objects 
-  /// generated in this class will match to their policy and its execution space. 
-  ///
-  template<typename PolicyType,        
-           typename FutureType>
-  class TaskFactory {
-  private:
-    static constexpr int _max_task_dependence = 10 ;
-
-  public:
-    typedef PolicyType policy_type;
-    typedef FutureType future_type;
-    
-    template<typename TaskFunctorType>
-    static KOKKOS_INLINE_FUNCTION
-    future_type create(policy_type &policy, const TaskFunctorType &func) {
-
-      future_type f ;
-      // while ( f.is_null() ) {
-        f = policy.task_create_team(func, _max_task_dependence);
-      // }
-      if ( f.is_null() ) Kokkos::abort("task_create_team FAILED, out of memory");
-      return f ;
-    }
-    
-    static KOKKOS_INLINE_FUNCTION
-    void spawn(policy_type &policy, const future_type &obj, bool priority = false ) {
-      policy.spawn(obj,priority);
-    }
-    
-    static KOKKOS_INLINE_FUNCTION
-    void addDependence(policy_type &policy, 
-                       const future_type &after, const future_type &before) {
-      policy.add_dependence(after, before);
-    }
-
-    template<typename TaskFunctorType>
-    static  KOKKOS_INLINE_FUNCTION
-    void addDependence(policy_type &policy, 
-                       TaskFunctorType *after, const future_type &before) {
-      policy.add_dependence(after, before);
-    }
-
-    template<typename TaskFunctorType>
-    static  KOKKOS_INLINE_FUNCTION
-    void clearDependence(policy_type &policy, TaskFunctorType *func) {
-      policy.clear_dependence(func);
-    }
-
-    template<typename TaskFunctorType>
-    static KOKKOS_INLINE_FUNCTION
-    void respawn(policy_type &policy, TaskFunctorType *func) {
-      policy.respawn(func);
-    }
-  };
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/task_view.hpp b/lib/kokkos/example/ichol/src/task_view.hpp
deleted file mode 100644
index ce280a325fd6a460c687f15e0a69c4aa6dd0e8b5..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/task_view.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#pragma once
-#ifndef __TASK_VIEW_HPP__
-#define __TASK_VIEW_HPP__
-
-/// \file task_view.hpp
-/// \brief Task view is inherited from matrix view and have a member for the task handler.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-namespace Tacho { 
-
-  using namespace std;
-
-  template<typename MatrixViewType,
-           typename TaskFactoryType>
-  class TaskView : public MatrixViewType {
-  public:
-    typedef          MatrixViewType                matrix_type ;
-    typedef typename MatrixViewType::value_type    value_type;
-    typedef typename MatrixViewType::ordinal_type  ordinal_type;
-
-    typedef TaskFactoryType task_factory_type;
-    typedef typename task_factory_type::policy_type policy_type;
-    typedef typename task_factory_type::future_type future_type;
-
-  private:
-    future_type _f;
-
-  public:
-    KOKKOS_INLINE_FUNCTION
-    void setFuture(const future_type &f)
-      { _f = f; }
-
-    KOKKOS_INLINE_FUNCTION
-    future_type Future() const { return _f; }
-
-    KOKKOS_INLINE_FUNCTION
-    ~TaskView() = default ;
-
-    KOKKOS_INLINE_FUNCTION
-    TaskView() 
-      : MatrixViewType(), _f()
-    { } 
-
-    TaskView(const TaskView &b) = delete ;
-
-    KOKKOS_INLINE_FUNCTION
-    TaskView(typename MatrixViewType::mat_base_type const & b) 
-      : MatrixViewType(b), _f() 
-    { }
-
-    KOKKOS_INLINE_FUNCTION
-    TaskView(typename MatrixViewType::mat_base_type const & b,
-             const ordinal_type offm, const ordinal_type m,
-             const ordinal_type offn, const ordinal_type n) 
-      : MatrixViewType(b, offm, m, offn, n), _f() 
-    { }
-
-  };
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#if ! KOKKOS_USING_EXP_VIEW
-
-namespace Kokkos {
-  namespace Impl {
-
-    //  The Kokkos::View allocation will by default assign each allocated datum to zero.
-    //  This is not the required initialization behavior when
-    //  non-trivial objects are used within a Kokkos::View.
-    //  Create a partial specialization of the Kokkos::Impl::AViewDefaultConstruct
-    //  to replace the assignment initialization with placement new initialization.
-    //
-    //  This work-around is necessary until a TBD design refactorization of Kokkos::View.
-
-    template< class ExecSpace , typename T1, typename T2 >
-    struct ViewDefaultConstruct< ExecSpace , Tacho::TaskView<T1,T2> , true >
-    {
-      typedef Tacho::TaskView<T1,T2> type ;
-      type * const m_ptr ;
-
-      KOKKOS_FORCEINLINE_FUNCTION
-      void operator()( const typename ExecSpace::size_type& i ) const
-      { new(m_ptr+i) type(); }
-
-      ViewDefaultConstruct( type * pointer , size_t capacity )
-        : m_ptr( pointer )
-      {
-        Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
-        parallel_for( range , *this );
-        ExecSpace::fence();
-      }
-    };
-
-  } // namespace Impl
-} // namespace Kokkos
-
-#endif /* #if ! KOKKOS_USING_EXP_VIEW */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/trsm.hpp b/lib/kokkos/example/ichol/src/trsm.hpp
deleted file mode 100644
index b4a6a7df48967257f824ae73680bf918d457be76..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/trsm.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma once
-#ifndef __TRSM_HPP__
-#define __TRSM_HPP__
-
-/// \file trsm.hpp
-/// \brief Sparse triangular solve on given sparse patterns and multiple rhs.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-
-#include "util.hpp"
-#include "control.hpp"
-#include "partition.hpp"
-
-namespace Tacho {
-
-  using namespace std;
-
-  template<int ArgSide,int ArgUplo, int ArgTrans, int ArgAlgo,
-           int ArgVariant = Variant::One,
-           template<int,int> class ControlType = Control>
-  struct Trsm {
-
-    // data-parallel interface
-    // =======================
-    template<typename ScalarType,
-             typename ExecViewTypeA,
-             typename ExecViewTypeB>
-    KOKKOS_INLINE_FUNCTION
-    static int invoke(typename ExecViewTypeA::policy_type &policy,
-                      const typename ExecViewTypeA::policy_type::member_type &member,
-                      const int diagA,
-                      const ScalarType alpha,
-                      typename ExecViewTypeA::matrix_type &A,
-                      typename ExecViewTypeB::matrix_type &B);
-
-    // task-data parallel interface
-    // ============================
-    template<typename ScalarType,
-             typename ExecViewTypeA,
-             typename ExecViewTypeB>
-    class TaskFunctor {
-    public:
-      typedef typename ExecViewTypeA::policy_type policy_type;
-      typedef typename policy_type::member_type member_type;
-      typedef int value_type;
-
-    private:
-      int _diagA;
-      ScalarType _alpha;
-      typename ExecViewTypeA::matrix_type _A;
-      typename ExecViewTypeB::matrix_type _B;
-
-      policy_type _policy;
-
-    public:
-      KOKKOS_INLINE_FUNCTION
-      TaskFunctor(const policy_type & P,
-                  const int diagA,
-                  const ScalarType alpha,
-                  const ExecViewTypeA & A,
-                  const ExecViewTypeB & B)
-        : _diagA(diagA),
-          _alpha(alpha),
-          _A(A),
-          _B(B),
-          _policy(P)
-      { }
-
-      string Label() const { return "Trsm"; }
-
-      // task execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(value_type &r_val) {
-        r_val = Trsm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB>(_policy, _policy.member_single(),
-                             _diagA, _alpha, _A, _B);
-      }
-
-      // task-data execution
-      KOKKOS_INLINE_FUNCTION
-      void apply(const member_type &member, value_type &r_val) {
-        r_val = Trsm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB>(_policy, member, 
-                             _diagA, _alpha, _A, _B);
-      }
-
-    };
-  };
-
-}
-
-// #include "trsm_l_u_nt.hpp"
-#include "trsm_l_u_ct.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp b/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp
deleted file mode 100644
index b6f3289474518bd88e55db198e4d2ad8efa7e435..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-#ifndef __TRSM_L_U_CT_HPP__
-#define __TRSM_L_U_CT_HPP__
-
-/// \file trsm_l_u_ct.hpp
-/// \brief Sparse triangular solve on given sparse patterns and multiple rhs.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-///
-#include "gemm.hpp"
-
-#include "trsm_l_u_ct_for_factor_blocked.hpp"
-// #include "trsm_l_u_ct_for_tri_solve_blocked.hpp"
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp
deleted file mode 100644
index 7414e5d80f07f895a8cd4e5182acb3fc9976be58..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp
+++ /dev/null
@@ -1,185 +0,0 @@
-#pragma once
-#ifndef __TRSM_L_U_CT_FOR_FACTOR_BLOCKED_HPP__
-#define __TRSM_L_U_CT_FOR_FACTOR_BLOCKED_HPP__
-
-/// \file trsm_l_u_ct_for_factor_blocked.hpp
-/// \brief Sparse triangular solve on given sparse patterns and multiple rhs.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-///
-
-namespace Tacho {
-
-  using namespace std;
-
-  // Trsm used in the factorization phase: data parallel on b1t
-  // ==========================================================
-  template<>
-  template<typename ScalarType,
-           typename CrsExecViewTypeA,
-           typename CrsExecViewTypeB>
-  KOKKOS_INLINE_FUNCTION
-  int
-  Trsm<Side::Left,Uplo::Upper,Trans::ConjTranspose,
-       AlgoTrsm::ForFactorBlocked,Variant::One>
-  ::invoke(typename CrsExecViewTypeA::policy_type &policy,
-           const typename CrsExecViewTypeA::policy_type::member_type &member,
-           const int diagA,
-           const ScalarType alpha,
-           typename CrsExecViewTypeA::matrix_type &A,
-           typename CrsExecViewTypeB::matrix_type &B) {
-    typedef typename CrsExecViewTypeA::ordinal_type      ordinal_type;
-    typedef typename CrsExecViewTypeA::value_type        value_type;
-    typedef typename CrsExecViewTypeA::row_view_type     row_view_type;
-
-
-if ( false && member.team_rank() == 0 ) {
- printf("Trsm [%d +%d)x[%d +%d)\n"
-       , B.OffsetRows()
-       , B.NumRows()
-       , B.OffsetCols()
-       , B.NumCols()
-       );
-}
-
-    // scale the matrix B with alpha
-    scaleCrsMatrix<ScalarType,CrsExecViewTypeB>(member, alpha, B);
-
-    // Solve a system: AX = B -> B := inv(A) B
-    const ordinal_type mA = A.NumRows();
-    const ordinal_type nB = B.NumCols();
-
-    if (nB > 0) {
-      for (ordinal_type k=0;k<mA;++k) {
-        row_view_type &a = A.RowView(k);
-        // const value_type cdiag = std::conj(a.Value(0)); // for complex<T>
-        const value_type cdiag = a.Value(0);
-
-        // invert
-        row_view_type &b1 = B.RowView(k);
-        const ordinal_type nnz_b1 = b1.NumNonZeros();
-
-        if (diagA != Diag::Unit && nnz_b1 > 0) {
-          // b1t = b1t / conj(diag)
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1),
-                               [&](const ordinal_type j) {
-                                 b1.Value(j) /= cdiag;
-                               });
-        }
-
-        // update
-        const ordinal_type nnz_a = a.NumNonZeros();
-        if (nnz_a > 0) {
-          // B2 = B2 - trans(conj(a12t)) b1t
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1),
-                               [&](const ordinal_type j) {
-                                 // grab b1
-                                 const ordinal_type col_at_j = b1.Col(j);
-                                 const value_type   val_at_j = b1.Value(j);
-
-                                 for (ordinal_type i=1;i<nnz_a;++i) {
-                                   // grab a12t
-                                   const ordinal_type row_at_i = a.Col(i);
-                                   // const value_type   val_at_i = conj(a.Value(i));
-                                   const value_type   val_at_i = a.Value(i);
-
-                                   // grab b2
-                                   row_view_type &b2 = B.RowView(row_at_i);
-
-                                   // check and update
-                                   ordinal_type idx = 0;
-                                   idx = b2.Index(col_at_j, idx);
-                                   if (idx >= 0)
-                                     b2.Value(idx) -= val_at_i*val_at_j;
-                                 }
-                               });
-        }
-        member.team_barrier();
-      }
-    }
-
-    return 0;
-  }
-
-  // Trsm used in the factorization phase: data parallel on a1t
-  // ==========================================================
-  template<>
-  template<typename ScalarType,
-           typename CrsExecViewTypeA,
-           typename CrsExecViewTypeB>
-  KOKKOS_INLINE_FUNCTION
-  int
-  Trsm<Side::Left,Uplo::Upper,Trans::ConjTranspose,
-       AlgoTrsm::ForFactorBlocked,Variant::Two>
-  ::invoke(typename CrsExecViewTypeA::policy_type &policy,
-           const typename CrsExecViewTypeA::policy_type::member_type &member,
-           const int diagA,
-           const ScalarType alpha,
-           typename CrsExecViewTypeA::matrix_type &A,
-           typename CrsExecViewTypeB::matrix_type &B) {
-    typedef typename CrsExecViewTypeA::ordinal_type      ordinal_type;
-    typedef typename CrsExecViewTypeA::value_type        value_type;
-    typedef typename CrsExecViewTypeA::row_view_type     row_view_type;
-
-    // scale the matrix B with alpha
-    scaleCrsMatrix<ScalarType,CrsExecViewTypeB>(member, alpha, B);
-
-    // Solve a system: AX = B -> B := inv(A) B
-    const ordinal_type mA = A.NumRows();
-    const ordinal_type nB = B.NumCols();
-
-    if (nB > 0) {
-      for (ordinal_type k=0;k<mA;++k) {
-        row_view_type &a = A.RowView(k);
-        // const value_type cdiag = conj(a.Value(0));
-        const value_type cdiag = a.Value(0);
-
-        // invert
-        row_view_type &b1 = B.RowView(k);
-        const ordinal_type nnz_b1 = b1.NumNonZeros();
-
-        if (diagA != Diag::Unit && nnz_b1 > 0) {
-          // b1t = b1t / conj(diag)
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1),
-                               [&](const ordinal_type j) {
-                                 b1.Value(j) /= cdiag;
-                               });
-          member.team_barrier();
-        }
-
-        // update
-        const ordinal_type nnz_a = a.NumNonZeros();
-        if (nnz_a > 0) {
-          // B2 = B2 - trans(conj(a12t)) b1t
-          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_a),
-                               [&](const ordinal_type i) {
-                                 // grab a12t
-                                 const ordinal_type row_at_i = a.Col(i);
-                                 // const value_type   val_at_i = conj(a.Value(i));
-                                 const value_type   val_at_i = a.Value(i);
-
-                                 // grab b2
-                                 row_view_type &b2 = B.RowView(row_at_i);
-
-                                 ordinal_type idx = 0;
-                                 for (ordinal_type j=0;j<nnz_b1 && (idx > -2);++j) {
-                                   // grab b1
-                                   const ordinal_type col_at_j = b1.Col(j);
-                                   const value_type   val_at_j = b1.Value(j);
-
-                                   // check and update
-                                   idx = b2.Index(col_at_j, idx);
-                                   if (idx >= 0)
-                                     b2.Value(idx) -= val_at_i*val_at_j;
-                                 }
-                               });
-          member.team_barrier();
-        }
-      }
-    }
-
-    return 0;
-  }
-
-}
-
-#endif
diff --git a/lib/kokkos/example/ichol/src/util.cpp b/lib/kokkos/example/ichol/src/util.cpp
deleted file mode 100644
index ef220c48c1b7d58af2289dde4f226a7a102d63ee..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/util.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-static int dummy = 1;
-
diff --git a/lib/kokkos/example/ichol/src/util.hpp b/lib/kokkos/example/ichol/src/util.hpp
deleted file mode 100644
index 020475bc52daad5c864d7caa8ed34d03157a0046..0000000000000000000000000000000000000000
--- a/lib/kokkos/example/ichol/src/util.hpp
+++ /dev/null
@@ -1,237 +0,0 @@
-#pragma once
-#ifndef __UTIL_HPP__
-#define __UTIL_HPP__
-
-#include <stdio.h>
-#include <string.h>
-
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <vector>
-#include <set>
-#include <map>
-#include <algorithm>
-#include <memory>
-
-#include <cmath>
-#include <complex>
-
-#include <limits>
-
-/// \file util.hpp
-/// \brief Utility functions and constant integer class like an enum class.
-/// \author Kyungjoo Kim (kyukim@sandia.gov)
-///
-/// This provides utility functions for implementing mini-app for incomplete
-/// sparse matrix factorization with task-data parallelism e.g., parameter
-/// classes, error handling, ostream << overloading.
-///
-/// Note: The reference of the "static const int" members in the enum-like
-/// classes should not be used as function arguments but their values only.
-
-
-using namespace std;
-
-namespace Tacho {
-
-#undef CHKERR
-#define CHKERR(ierr)                                                    \
-  if (ierr != 0) { cout << endl << ">> Error in " << __FILE__ << ", " << __LINE__ << " : " << ierr << endl; }
-
-#define MSG_NOT_YET_IMPLEMENTED ">> Not yet implemented"
-#define MSG_INVALID_INPUT(what) ">> Invaid input argument: " #what
-#define MSG_INVALID_TEMPLATE_ARGS ">> Invaid template arguments"
-#define ERROR(msg)                                                      \
-  { cout << endl << ">> Error in " << __FILE__ << ", " << __LINE__ << endl << msg << endl; }
-
-  // control id
-#undef  Ctrl
-#define Ctrl(name,algo,variant) name<algo,variant>
-
-  // control leaf
-#undef CtrlComponent
-#define CtrlComponent(name,algo,variant,component,id)                  \
-  Ctrl(name,algo,variant)::component[id]
-
-  // control recursion
-#undef CtrlDetail
-#define CtrlDetail(name,algo,variant,component) \
-  CtrlComponent(name,algo,variant,component,0),CtrlComponent(name,algo,variant,component,1),name
-
-  /// \class GraphHelper
-  class GraphHelper {
-  public:
-    static const int DefaultRandomSeed = -1;
-  };
-
-
-  /// \class Partition
-  /// \brief Matrix partition parameters.
-  class Partition {
-  public:
-    static const int Top         = 101;
-    static const int Bottom      = 102;
-
-    static const int Left        = 201;
-    static const int Right       = 202;
-
-    static const int TopLeft     = 401;
-    static const int TopRight    = 402;
-    static const int BottomLeft  = 403;
-    static const int BottomRight = 404;
-  };
-
-  /// \class Uplo
-  /// \brief Matrix upper/lower parameters.
-  class Uplo {
-  public:
-    static const int Upper = 501;
-    static const int Lower = 502;
-  };
-
-  /// \class Side
-  /// \brief Matrix left/right parameters.
-  class Side {
-  public:
-    static const int Left  = 601;
-    static const int Right = 602;
-  };
-
-  /// \class Diag
-  /// \brief Matrix unit/non-unit diag parameters.
-  class Diag {
-  public:
-    static const int Unit    = 701;
-    static const int NonUnit = 702;
-  };
-
-  /// \class Trans
-  /// \brief Matrix upper/lower parameters.
-  class Trans {
-  public:
-    static const int Transpose     = 801;
-    static const int ConjTranspose = 802;
-    static const int NoTranspose   = 803;
-  };
-
-  /// \class Loop
-  /// \brief outer/innner parameters
-  class Loop {
-  public:
-    static const int Outer = 901;
-    static const int Inner = 902;
-    static const int Fused = 903;
-  };
-
-  class Variant {
-  public:
-    static const int One   = 1;
-    static const int Two   = 2;
-    static const int Three = 3;
-    static const int Four  = 4;
-  };
-
-  /// \class AlgoChol
-  /// \brief Algorithmic variants in sparse factorization and sparse BLAS operations.
-  class AlgoChol {
-  public:
-    // One side factorization on flat matrices
-    static const int Dummy                  = 1000;
-    static const int Unblocked              = 1001;
-    static const int UnblockedOpt           = 1002;
-    static const int Blocked                = 1101; // testing only
-
-    static const int RightLookByBlocks      = 1201; // backbone structure is right looking
-    static const int ByBlocks               = RightLookByBlocks;
-
-    static const int NestedDenseBlock       = 1211;
-    static const int NestedDenseByBlocks    = 1212;
-
-    static const int RightLookDenseByBlocks = 1221;
-    static const int DenseByBlocks          = RightLookDenseByBlocks;
-
-    static const int ExternalLapack         = 1231;
-    static const int ExternalPardiso        = 1232;
-  };
-
-  // aliasing name space
-  typedef AlgoChol AlgoTriSolve;
-
-  class AlgoBlasLeaf {
-  public:
-    // One side factorization on flat matrices
-    static const int ForFactorBlocked = 2001;
-
-    // B and C are dense matrices and used for solve phase
-    static const int ForTriSolveBlocked = 2011;
-
-    static const int ExternalBlas = 2021;
-  };
-
-  class AlgoGemm : public AlgoBlasLeaf {
-  public:
-    static const int DenseByBlocks = 2101;
-  };
-
-  class AlgoTrsm : public AlgoBlasLeaf {
-  public:
-    static const int DenseByBlocks = 2201;
-  };
-
-  class AlgoHerk : public AlgoBlasLeaf {
-  public:
-    static const int DenseByBlocks = 2301;
-  };
-
-  /// \brief Interface for overloaded stream operators.
-  template<typename T>
-  inline
-  ostream& operator<<(ostream &os, const unique_ptr<T> &p) {
-    return p->showMe(os);
-  }
-
-  /// \class Disp
-  /// \brief Interface for the stream operator.
-  class Disp {
-    friend ostream& operator<<(ostream &os, const Disp &disp);
-  public:
-    Disp() { }
-    virtual ostream& showMe(ostream &os) const {
-      return os;
-    }
-  };
-
-  /// \brief Implementation of the overloaded stream operator.
-  inline
-  ostream& operator<<(ostream &os, const Disp &disp) {
-    return disp.showMe(os);
-  }
-
-  template<typename T> struct NumericTraits {};
-
-  template<>
-  struct NumericTraits<float> {
-    typedef float real_type;
-    static real_type epsilon() { return numeric_limits<float>::epsilon(); }
-  };
-  template<>
-  struct NumericTraits<double> {
-    typedef double real_type;
-    static real_type epsilon() { return numeric_limits<double>::epsilon(); }
-  };
-  template<>
-  struct NumericTraits<complex<float> > {
-    typedef float real_type;
-    static real_type epsilon() { return numeric_limits<float>::epsilon(); }
-  };
-  template<>
-  struct NumericTraits<complex<double> > {
-    typedef double real_type;
-    static real_type epsilon() { return numeric_limits<double>::epsilon(); }
-  };
-
-}
-
-#endif
diff --git a/lib/kokkos/example/md_skeleton/Makefile b/lib/kokkos/example/md_skeleton/Makefile
index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644
--- a/lib/kokkos/example/md_skeleton/Makefile
+++ b/lib/kokkos/example/md_skeleton/Makefile
@@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
 default: build
 	echo "Start Build"
 
-# use installed Makefile.kokkos
-include $(KOKKOS_PATH)/Makefile.kokkos
-
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = $(NVCC_WRAPPER)
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS = 
-EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "Cuda,OpenMP"
-#KOKKOS_ARCH = "SNB,Kepler35"
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
+  EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
 else
-CXX = g++
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS =  
-EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "OpenMP"
-#KOKKOS_ARCH = "SNB"
+  CXX = g++
+  EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
 endif
 
+CXXFLAGS = -O3 -I$(SRC_DIR)
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 DEPFLAGS = -M
 
 LIB =
diff --git a/lib/kokkos/example/multi_fem/Makefile b/lib/kokkos/example/multi_fem/Makefile
index 72e1768fcb9b446f94400a3e783767923779f6bf..4b114b56255f152206adee8dbc8979ae9015050f 100644
--- a/lib/kokkos/example/multi_fem/Makefile
+++ b/lib/kokkos/example/multi_fem/Makefile
@@ -12,27 +12,23 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
 default: build
 	echo "Start Build"
 
-# use installed Makefile.kokkos
-include $(KOKKOS_PATH)/Makefile.kokkos
+CXXFLAGS = -O3 -I$(SRC_DIR)
+LDFLAGS ?=
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = $(NVCC_WRAPPER)
-CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3
-LINK = $(CXX)
-LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse
-EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "Cuda,OpenMP"
-#KOKKOS_ARCH = "SNB,Kepler35"
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
+  EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+  CXXFLAGS += -I$(SRC_DIR) -I$(CUDA_PATH) -O3
+  LDFLAGS += -L$(CUDA_PATH)/lib64 -lcusparse
 else
-CXX = g++
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS =  
-EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "OpenMP"
-#KOKKOS_ARCH = "SNB"
+  CXX = g++
+  EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
 endif
 
+LINK ?= $(CXX)
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 DEPFLAGS = -M
 
 LIB =
diff --git a/lib/kokkos/example/query_device/Makefile b/lib/kokkos/example/query_device/Makefile
index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644
--- a/lib/kokkos/example/query_device/Makefile
+++ b/lib/kokkos/example/query_device/Makefile
@@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
 default: build
 	echo "Start Build"
 
-# use installed Makefile.kokkos
-include $(KOKKOS_PATH)/Makefile.kokkos
-
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = $(NVCC_WRAPPER)
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS = 
-EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "Cuda,OpenMP"
-#KOKKOS_ARCH = "SNB,Kepler35"
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
+  EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
 else
-CXX = g++
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS =  
-EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "OpenMP"
-#KOKKOS_ARCH = "SNB"
+  CXX = g++
+  EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
 endif
 
+CXXFLAGS = -O3 -I$(SRC_DIR)
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 DEPFLAGS = -M
 
 LIB =
diff --git a/lib/kokkos/example/sort_array/Makefile b/lib/kokkos/example/sort_array/Makefile
index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644
--- a/lib/kokkos/example/sort_array/Makefile
+++ b/lib/kokkos/example/sort_array/Makefile
@@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
 default: build
 	echo "Start Build"
 
-# use installed Makefile.kokkos
-include $(KOKKOS_PATH)/Makefile.kokkos
-
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = $(NVCC_WRAPPER)
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS = 
-EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "Cuda,OpenMP"
-#KOKKOS_ARCH = "SNB,Kepler35"
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
+  EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
 else
-CXX = g++
-CXXFLAGS = -I$(SRC_DIR) -O3
-LINK = $(CXX)
-LINKFLAGS =  
-EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
-#KOKKOS_DEVICES = "OpenMP"
-#KOKKOS_ARCH = "SNB"
+  CXX = g++
+  EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
 endif
 
+CXXFLAGS = -O3 -I$(SRC_DIR)
+LINK ?= $(CXX)
+LDFLAGS ?=
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
 DEPFLAGS = -M
 
 LIB =
diff --git a/lib/kokkos/example/sort_array/sort_array.hpp b/lib/kokkos/example/sort_array/sort_array.hpp
index d21f9989582c7be28e7c5c1c0f325330cc340e78..ae17cb7ac7dc44cbbb4287b3a47c7fd0021de85a 100644
--- a/lib/kokkos/example/sort_array/sort_array.hpp
+++ b/lib/kokkos/example/sort_array/sort_array.hpp
@@ -105,7 +105,7 @@ void sort_array( const size_t array_length /* length of spans of array to sort *
 #if defined( KOKKOS_HAVE_CUDA )
 
   typedef typename
-    Kokkos::Impl::if_c< Kokkos::Impl::is_same< Device , Kokkos::Cuda >::value
+    Kokkos::Impl::if_c< std::is_same< Device , Kokkos::Cuda >::value
                       , Kokkos::View<int*,Kokkos::Cuda::array_layout,Kokkos::CudaHostPinnedSpace>
                       , typename device_array_type::HostMirror
                       >::type  host_array_type ;
diff --git a/lib/kokkos/example/tutorial/01_hello_world/Makefile b/lib/kokkos/example/tutorial/01_hello_world/Makefile
index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..62ab22f17eb561b4ffcdd38a91115627b8460821 100644
--- a/lib/kokkos/example/tutorial/01_hello_world/Makefile
+++ b/lib/kokkos/example/tutorial/01_hello_world/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/01_hello_world/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 01_hello_world.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 01_hello_world.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile
index 95ee2c47feacf363f99052173a28596144a75734..52d5fb07c481bf7357a6acbfc7ff547f3621f180 100644
--- a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile
+++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile
@@ -1,37 +1,42 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/01_hello_world_lambda/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 01_hello_world_lambda.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
-KOKKOS_CUDA_OPTIONS = "enable_lambda"
+KOKKOS_CUDA_OPTIONS += "enable_lambda"
 else
 CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 01_hello_world_lambda.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -41,4 +46,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp
index b6c9cc5e4380d4ea8b825c9305f2e7cea6316a10..4b8b9db621106417ba2e73c1e00a7b0a4088552f 100644
--- a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp
+++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp
@@ -98,11 +98,14 @@ int main (int argc, char* argv[]) {
   //
   // You may notice that the printed numbers do not print out in
   // order.  Parallel for loops may execute in any order.
+  // We also need to protect the usage of a lambda against compiling
+  // with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
+#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
   Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) {
       // printf works in a CUDA parallel kernel; std::ostream does not.
       printf ("Hello from i = %i\n", i);
     });
-
+#endif
   // You must call finalize() after you are done using Kokkos.
   Kokkos::finalize ();
 }
diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile
index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..d102af5151c3eb65bd470665371a1a50dec339f8 100644
--- a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile
+++ b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/02_simple_reduce/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 02_simple_reduce.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 02_simple_reduce.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile
index 95ee2c47feacf363f99052173a28596144a75734..4545668b77ba2e36a3b6412dce2c4836e1db29cc 100644
--- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile
+++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile
@@ -1,37 +1,42 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/02_simple_reduce_lambda/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 02_simple_reduce_lambda.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
-KOKKOS_CUDA_OPTIONS = "enable_lambda"
+KOKKOS_CUDA_OPTIONS += "enable_lambda"
 else
 CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 02_simple_reduce_lambda.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -41,4 +46,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp
index a403633a8a898375f2f5c0d4015fc3930570ef0d..f44ddce309de519d7109fb7a4212f7533c73d57c 100644
--- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp
+++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp
@@ -67,9 +67,13 @@ int main (int argc, char* argv[]) {
   int sum = 0;
   // The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
   // It also handles any other syntax needed for CUDA.
+  // We also need to protect the usage of a lambda against compiling
+  // with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
+  #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
   Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) {
       lsum += i*i;
     }, sum);
+  #endif
   printf ("Sum of squares of integers from 0 to %i, "
           "computed in parallel, is %i\n", n - 1, sum);
 
@@ -81,6 +85,10 @@ int main (int argc, char* argv[]) {
   printf ("Sum of squares of integers from 0 to %i, "
           "computed sequentially, is %i\n", n - 1, seqSum);
   Kokkos::finalize ();
+#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
   return (sum == seqSum) ? 0 : -1;
+#else
+  return 0;
+#endif
 }
 
diff --git a/lib/kokkos/example/tutorial/03_simple_view/Makefile b/lib/kokkos/example/tutorial/03_simple_view/Makefile
index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..e716b765e7f1778d839f2dcd603d258d2287c8fe 100644
--- a/lib/kokkos/example/tutorial/03_simple_view/Makefile
+++ b/lib/kokkos/example/tutorial/03_simple_view/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/03_simple_view/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 03_simple_view.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 03_simple_view.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile
index 95ee2c47feacf363f99052173a28596144a75734..b93c14910e40ce57936d71cedc869e3dc79182aa 100644
--- a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile
+++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile
@@ -1,37 +1,42 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/03_simple_view_lambda/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 03_simple_view_lambda.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
-KOKKOS_CUDA_OPTIONS = "enable_lambda"
+KOKKOS_CUDA_OPTIONS += "enable_lambda"
 else
 CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 03_simple_view_lambda.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -41,4 +46,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp
index 974af747763bfba23a2f6d3dfeefe68fb9ec4e25..e9e7c2370b3d4f99c5d0998cb5520ce3cca2221b 100644
--- a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp
+++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp
@@ -97,6 +97,9 @@ int main (int argc, char* argv[]) {
   // pointers, not like std::vector.  Passing them by value does a
   // shallow copy.  A deep copy never happens unless you explicitly
   // ask for one.
+  // We also need to protect the usage of a lambda against compiling
+  // with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
+  #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
   Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) {
     // Acesss the View just like a Fortran array.  The layout depends
     // on the View's memory space, so don't rely on the View's
@@ -111,6 +114,7 @@ int main (int argc, char* argv[]) {
     lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
   }, sum);
   printf ("Result: %f\n", sum);
+  #endif
   Kokkos::finalize ();
 }
 
diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile
index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..8dd7598f03664eb610e6bd4376697dc801b80609 100644
--- a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile
+++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/04_simple_memoryspaces/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 04_simple_memoryspaces.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 04_simple_memoryspaces.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile
index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..d297d45576b73a6f622b4d7f9ed84b9dddf4c481 100644
--- a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile
+++ b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/05_simple_atomics/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 05_simple_atomics.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 05_simple_atomics.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..956a4d1798f2318a14ec2a3a758a6e0bca5a047d 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/01_data_layouts/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 01_data_layouts.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 01_data_layouts.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..41697b0731e2934c94133ec0876fa8f963a299fd 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/02_memory_traits/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 02_memory_traits.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 02_memory_traits.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..8d0697aa2115c79c5749c23195dd917543ea8928 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/03_subviews/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 03_subviews.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 03_subviews.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..0a3acd984f2ac88e25e034c747790cb01853018f 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/04_dualviews/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 04_dualviews.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 04_dualviews.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp
index 4905e4bf88485c70527d9080844940a61c60365c..26b55eae7886f146699dd527a69a97a974d2dc6d 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp
+++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp
@@ -76,7 +76,7 @@ struct localsum {
   // overrides Kokkos' default execution space.
   typedef ExecutionSpace execution_space;
 
-  typedef typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value ,
+  typedef typename Kokkos::Impl::if_c<std::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value ,
      idx_type::memory_space, idx_type::host_mirror_space>::type memory_space;
 
   // Get the view types on the particular device for which the functor
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..615ee2887a800829d055bf03e126e411ae438669 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 05_NVIDIA_UVM.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 05_NVIDIA_UVM.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp
index cf5326b687199ff8c5c14580b18a9e406279cd11..72fd444abfe72f77de348291b0b4480a370e2dc1 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp
+++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp
@@ -47,9 +47,13 @@
 #include <cstdio>
 #include <cstdlib>
 
-typedef Kokkos::View<double*> view_type;
-typedef Kokkos::View<int**> idx_type;
-
+#ifdef KOKKOS_HAVE_CUDA
+typedef Kokkos::View<double*, Kokkos::CudaUVMSpace> view_type;
+typedef Kokkos::View<int**, Kokkos::CudaUVMSpace> idx_type;
+#else
+typedef Kokkos::View<double*,Kokkos::HostSpace> view_type;
+typedef Kokkos::View<int**,Kokkos::HostSpace> idx_type;
+#endif
 
 template<class Device>
 struct localsum {
@@ -59,7 +63,7 @@ struct localsum {
   // Get the view types on the particular device the functor is instantiated for
   idx_type::const_type idx;
   view_type dest;
-  Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::execution_space, Kokkos::MemoryRandomAccess > src;
+  Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::device_type, Kokkos::MemoryRandomAccess > src;
 
   localsum(idx_type idx_, view_type dest_,
       view_type src_):idx(idx_),dest(dest_),src(src_) {
@@ -68,7 +72,7 @@ struct localsum {
   KOKKOS_INLINE_FUNCTION
   void operator() (int i) const {
     double tmp = 0.0;
-    for(int j = 0; j < idx.dimension_1(); j++) {
+    for(int j = 0; j < int(idx.dimension_1()); j++) {
       const double val = src(idx(i,j));
       tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
     }
@@ -88,9 +92,11 @@ int main(int narg, char* arg[]) {
 
   srand(134231);
 
+  Kokkos::fence();
+
   // When using UVM Cuda views can be accessed on the Host directly
   for(int i=0; i<size; i++) {
-    for(int j=0; j<idx.dimension_1(); j++)
+    for(int j=0; j<int(idx.dimension_1()); j++)
       idx(i,j) = (size + i + (rand()%500 - 250))%size;
   }
 
@@ -126,8 +132,8 @@ int main(int narg, char* arg[]) {
 
 
 
-  printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
-  printf("Host   Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
+  printf("Device Time with Sync: %e without Sync: %e \n",sec1_dev,sec2_dev);
+  printf("Host   Time with Sync: %e without Sync: %e \n",sec1_host,sec2_host);
 
   Kokkos::finalize();
 }
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..dfb7d6df641f13de25e8f84f7038d9a86a905094 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 06_AtomicViews.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 06_AtomicViews.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile
index 60a514f4d50ccf3e36fa2a8233de90c46f3bbe5d..432a90126d6fbb0bacafcef1679125eea93088e6 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3 --default-stream per-thread 
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 07_Overlapping_DeepCopy.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 07_Overlapping_DeepCopy.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/Makefile
index 19053b61b037f6a21f1be0874b1c23cbbb02a234..bc4012f68cfa22fcf0c9ac074391f26bd7a149d8 100644
--- a/lib/kokkos/example/tutorial/Advanced_Views/Makefile
+++ b/lib/kokkos/example/tutorial/Advanced_Views/Makefile
@@ -1,84 +1,121 @@
-default:
+ifndef KOKKOS_PATH
+  MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+  KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../../..
+endif
+
+ifndef KOKKOS_SETTINGS
+  KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}"
+  ifdef KOKKOS_ARCH
+    KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}"
+  endif
+  ifdef KOKKOS_DEVICES
+    KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}"
+  endif
+  ifdef KOKKOS_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}"
+  endif
+  ifdef KOKKOS_CUDA_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}"
+  endif
+endif
+
+build:
+	mkdir -p 01_data_layouts
 	cd ./01_data_layouts; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 02_memory_traits
 	cd ./02_memory_traits; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 03_subviews
 	cd ./03_subviews; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 04_dualviews
 	cd ./04_dualviews; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 05_NVIDIA_UVM
 	cd ./05_NVIDIA_UVM; \
-	make -j 4
-	cd ./06_AtomicViews; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS}
+	#mkdir -p 06_AtomicViews
+	#cd ./06_AtomicViews; \
+	#make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS}
+	#mkdir -p 07_Overlapping_DeepCopy
+	#cd ./07_Overlapping_DeepCopy; \
+	#make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS}
 
-openmp:
+build-insource:
 	cd ./01_data_layouts; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_memory_traits; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_subviews; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./04_dualviews; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./05_NVIDIA_UVM; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
-	cd ./06_AtomicViews; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
-
-pthreads:
+	make build -j 4 ${KOKKOS_SETTINGS}
+	#cd ./06_AtomicViews; \
+	#make build -j 4 ${KOKKOS_SETTINGS}
+	#cd ./07_Overlapping_DeepCopy; \
+	#make build -j 4 ${KOKKOS_SETTINGS}
+test:
 	cd ./01_data_layouts; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_memory_traits; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_subviews; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS}
 	cd ./04_dualviews; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS}
 	cd ./05_NVIDIA_UVM; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
-	cd ./06_AtomicViews; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS}
+	#cd ./06_AtomicViews; \
+	#make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS}
+	#cd ./07_Overlapping_DeepCopy; \
+	#make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS}
 
-serial:
+test-insource:
 	cd ./01_data_layouts; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_memory_traits; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_subviews; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./04_dualviews; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./05_NVIDIA_UVM; \
-	make -j 4 KOKKOS_DEVICES=Serial
-	cd ./06_AtomicViews; \
-	make -j 4 KOKKOS_DEVICES=Serial
-
-cuda:
+	make test -j 4 ${KOKKOS_SETTINGS}
+	#cd ./06_AtomicViews; \
+	#make test -j 4 ${KOKKOS_SETTINGS}
+	#cd ./07_Overlapping_DeepCopy; \
+	#make test -j 4 ${KOKKOS_SETTINGS}
+clean:
 	cd ./01_data_layouts; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_memory_traits; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_subviews; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS}
 	cd ./04_dualviews; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS}
 	cd ./05_NVIDIA_UVM; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
-	cd ./06_AtomicViews; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS}
+	#cd ./06_AtomicViews; \
+	#make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS}
+	#cd ./07_Overlapping_DeepCopy; \
+	#make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS}
 
-clean:
+clean-insource:
 	cd ./01_data_layouts; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./02_memory_traits; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./03_subviews; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./04_dualviews; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./05_NVIDIA_UVM; \
-	make clean
-	cd ./06_AtomicViews; \
-	make clean
-
+	make clean ${KOKKOS_SETTINGS}
+	#cd ./06_AtomicViews; \
+	#make clean ${KOKKOS_SETTINGS}
+	#cd ./07_Overlapping_DeepCopy; \
+	#make clean ${KOKKOS_SETTINGS}
diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..60f6f94cdf30b2753903e9242065f3e610ff9e17 100644
--- a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile
+++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Algorithms/01_random_numbers/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 01_random_numbers.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 01_random_numbers.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp
index 3e6175a75652d54af1f0ad3c3c818485ccc59b07..a5cf40cedc172f7cc6e94e01e3c1d7e4202a9bd8 100644
--- a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp
+++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp
@@ -71,11 +71,13 @@ typedef Kokkos::HostSpace::execution_space DefaultHostType;
 template<class GeneratorPool>
 struct generate_random {
 
-  // The GeneratorPool
-  GeneratorPool rand_pool;
 
   // Output View for the random numbers
   Kokkos::View<uint64_t*> vals;
+  
+  // The GeneratorPool
+  GeneratorPool rand_pool;
+  
   int samples;
 
   // Initialize all members
@@ -139,8 +141,8 @@ int main(int argc, char* args[]) {
   Kokkos::fence();
   double time_1024 = timer.seconds();
 
-  printf("#Time XorShift64*:   %lf %lf\n",time_64,1.0e-9*samples*size/time_64 );
-  printf("#Time XorShift1024*: %lf %lf\n",time_1024,1.0e-9*samples*size/time_1024 );
+  printf("#Time XorShift64*:   %e %e\n",time_64,1.0e-9*samples*size/time_64 );
+  printf("#Time XorShift1024*: %e %e\n",time_1024,1.0e-9*samples*size/time_1024 );
 
   Kokkos::deep_copy(vals.h_view,vals.d_view);
 
diff --git a/lib/kokkos/example/tutorial/Algorithms/Makefile b/lib/kokkos/example/tutorial/Algorithms/Makefile
index edc2a36024fc24a791a27064e4f36febfec81c1a..ad0b76f9d66f4e3f35f5f1dc329b976c2603353e 100644
--- a/lib/kokkos/example/tutorial/Algorithms/Makefile
+++ b/lib/kokkos/example/tutorial/Algorithms/Makefile
@@ -1,24 +1,43 @@
-default:
-	cd ./01_random_numbers; \
-	make -j 4
+ifndef KOKKOS_PATH
+  MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+  KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../../..
+endif
 
-openmp:
-	cd ./01_random_numbers; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+ifndef KOKKOS_SETTINGS
+  KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}"
+  ifdef KOKKOS_ARCH
+    KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}"
+  endif
+  ifdef KOKKOS_DEVICES
+    KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}"
+  endif
+  ifdef KOKKOS_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}"
+  endif
+  ifdef KOKKOS_CUDA_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}"
+  endif
+endif
 
-pthreads:
+build:
+	mkdir -p 01_random_numbers
 	cd ./01_random_numbers; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS}
 
-serial:
+build-insource:
 	cd ./01_random_numbers; \
-	make -j 4 KOKKOS_DEVICES=Serial
-
-cuda:
+	make build -j 4 ${KOKKOS_SETTINGS}
+test:
 	cd ./01_random_numbers; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS}
 
+test-insource:
+	cd ./01_random_numbers; \
+	make test -j 4 ${KOKKOS_SETTINGS}
 clean:
 	cd ./01_random_numbers; \
-	make clean
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS}
 
+clean-insource:
+	cd ./01_random_numbers; \
+	make clean ${KOKKOS_SETTINGS}
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..8c50430c3000b06509efc0e90ea56981ee7ed655 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 01_thread_teams.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 01_thread_teams.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile
index 965b72b4e9a7aac83f1a748d3f0c4fe611aafabb..b9b017bf1b26d0109260e59c1c0847089989c3d9 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile
@@ -1,37 +1,42 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 01_thread_teams_lambda.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
-KOKKOS_CUDA_OPTIONS = "enable_lambda"
+KOKKOS_CUDA_OPTIONS += "enable_lambda"
 else
 CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 01_thread_teams_lambda.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -41,4 +46,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp
index 565dd22e82849fde2fe527f25179ae49346222f9..c0865cfa65336be08f717ebc989f1d994e1faba2 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp
@@ -77,6 +77,9 @@ int main (int narg, char* args[]) {
   // region."  That is, every team member is active and will execute
   // the body of the lambda.
   int sum = 0;
+  // We also need to protect the usage of a lambda against compiling
+  // with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
+  #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
   parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) {
       lsum += 1;
       // TeamPolicy<>::member_type provides functions to query the
@@ -85,7 +88,7 @@ int main (int narg, char* args[]) {
       printf ("Hello World: %i %i // %i %i\n", thread.league_rank (),
               thread.team_rank (), thread.league_size (), thread.team_size ());
     }, sum);
-
+  #endif
   // The result will be 12*team_policy::team_size_max([=]{})
   printf ("Result %i\n",sum);
 
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..bae9351229856fec2da7a0e8943d06fb6ce68f1f 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 02_nested_parallel_for.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 02_nested_parallel_for.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..a041b69b560feb9dc8cb459b56a542e9d3249830 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 03_vectorization.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 03_vectorization.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile
index 12ad36b31e458d155aa6dc653ab8188a7773bd18..6418875c9e2a041f301793c99c048b4a868f8ae5 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile
@@ -1,15 +1,17 @@
 KOKKOS_PATH = ../../../..
-SRC = $(wildcard *.cpp)
+KOKKOS_SRC_PATH = ${KOKKOS_PATH}
+SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/*.cpp)
+vpath %.cpp $(sort $(dir $(SRC)))
 
 default: build
 	echo "Start Build"
 
 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ../../../../config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS = 
-EXE = $(SRC:.cpp=.cuda)
+EXE = 04_team_scan.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
 else
@@ -17,20 +19,23 @@ CXX = g++
 CXXFLAGS = -O3
 LINK = ${CXX}
 LINKFLAGS =  
-EXE = $(SRC:.cpp=.host)
+EXE = 04_team_scan.host
 KOKKOS_DEVICES = "OpenMP"
 KOKKOS_ARCH = "SNB"
 endif
 
 DEPFLAGS = -M
 
-OBJ = $(SRC:.cpp=.o)
+OBJ = $(notdir $(SRC:.cpp=.o))
 LIB =
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 
 build: $(EXE)
 
+test: $(EXE)
+	./$(EXE)
+
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
 
@@ -40,4 +45,4 @@ clean: kokkos-clean
 # Compilation rules
 
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp
index c12b11d04ddc99957ec4be93c3928b9c3558cb92..ebc8578f0bce9f728670d594a968b5d289eb68da 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp
@@ -96,7 +96,10 @@ struct find_2_tuples {
       }
     dev.team_barrier();
   }
-  size_t team_shmem_size( int team_size ) const { return sizeof(int)*(chunk_size+2 + team_size * team_size ); }
+  size_t team_shmem_size( int team_size ) const { 
+    return Kokkos::View<int**,Kokkos::MemoryUnmanaged>::shmem_size(TEAM_SIZE,TEAM_SIZE) +
+           Kokkos::View<int*,Kokkos::MemoryUnmanaged>::shmem_size(chunk_size+1);
+  }
 };
 
 int main(int narg, char* args[]) {
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile
index 9d6fff7981806a6d28d7704f9d4a0e6c776c8ed0..44fdf90f8a837da174b96fcb9032b3e47920390f 100644
--- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile
@@ -1,72 +1,95 @@
-default:
+ifndef KOKKOS_PATH
+  MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+  KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../../..
+endif
+
+ifndef KOKKOS_SETTINGS
+  KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}"
+  ifdef KOKKOS_ARCH
+    KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}"
+  endif
+  ifdef KOKKOS_DEVICES
+    KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}"
+  endif
+  ifdef KOKKOS_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}"
+  endif
+  ifdef KOKKOS_CUDA_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}"
+  endif
+endif
+
+build:
+	mkdir -p 01_thread_teams
 	cd ./01_thread_teams; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 01_thread_teams_lambda
 	cd ./01_thread_teams_lambda; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 02_nested_parallel_for
 	cd ./02_nested_parallel_for; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 03_vectorization
 	cd ./03_vectorization; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 04_team_scan
 	cd ./04_team_scan; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS}
 
-openmp:
+build-insource:
 	cd ./01_thread_teams; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./01_thread_teams_lambda; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_nested_parallel_for; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_vectorization; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./04_team_scan; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
-
-pthreads:
+	make build -j 4 ${KOKKOS_SETTINGS}
+test:
 	cd ./01_thread_teams; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS}
 	cd ./01_thread_teams_lambda; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_nested_parallel_for; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_vectorization; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS}
 	cd ./04_team_scan; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS}
 
-serial:
+test-insource:
 	cd ./01_thread_teams; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./01_thread_teams_lambda; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_nested_parallel_for; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_vectorization; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./04_team_scan; \
-	make -j 4 KOKKOS_DEVICES=Serial
-
-cuda:
+	make test -j 4 ${KOKKOS_SETTINGS}
+clean:
 	cd ./01_thread_teams; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS}
 	cd ./01_thread_teams_lambda; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_nested_parallel_for; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_vectorization; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS}
 	cd ./04_team_scan; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS}
 
-clean:
+clean-insource:
 	cd ./01_thread_teams; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./01_thread_teams_lambda; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./02_nested_parallel_for; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./03_vectorization; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./04_team_scan; \
-	make clean
-
+	make clean ${KOKKOS_SETTINGS}
diff --git a/lib/kokkos/example/tutorial/Makefile b/lib/kokkos/example/tutorial/Makefile
index 300d98ab44340404b31dfb8690ce2a5577b55636..063ace8aabbe6017611ac17a54d12a47cb7e3196 100644
--- a/lib/kokkos/example/tutorial/Makefile
+++ b/lib/kokkos/example/tutorial/Makefile
@@ -1,144 +1,174 @@
-default:
+
+ifndef KOKKOS_PATH
+  MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+  KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../..
+endif
+
+ifndef KOKKOS_SETTINGS
+  KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}"
+  ifdef KOKKOS_ARCH
+    KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}"
+  endif
+  ifdef KOKKOS_DEVICES
+    KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}"
+  endif
+  ifdef KOKKOS_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}"
+  endif
+  ifdef KOKKOS_CUDA_OPTIONS
+    KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}"
+  endif
+endif
+
+build:
+	mkdir -p 01_hello_world
 	cd ./01_hello_world; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 01_hello_world_lambda
 	cd ./01_hello_world_lambda; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 02_simple_reduce
 	cd ./02_simple_reduce; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 02_simple_reduce_lambda
 	cd ./02_simple_reduce_lambda; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 03_simple_view
 	cd ./03_simple_view; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 03_simple_view_lambda
 	cd ./03_simple_view_lambda; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 04_simple_memoryspaces
 	cd ./04_simple_memoryspaces; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p 05_simple_atomics
 	cd ./05_simple_atomics; \
-	make -j 4
+	make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS}
+	mkdir -p Advanced_Views
 	cd ./Advanced_Views; \
-	make -j 4
+	make build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
+	mkdir -p Algorithms
 	cd ./Algorithms; \
-	make -j 4
+	make build -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
+	mkdir -p Hierarchical_Parallelism
 	cd ./Hierarchical_Parallelism; \
-	make -j 4
+	make build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 
-openmp:
+build-insource:
 	cd ./01_hello_world; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./01_hello_world_lambda; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce_lambda; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_simple_view; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_simple_view_lambda; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./04_simple_memoryspaces; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./05_simple_atomics; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build -j 4 ${KOKKOS_SETTINGS}
 	cd ./Advanced_Views; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Algorithms; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
+	make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Hierarchical_Parallelism; \
-	make -j 4 KOKKOS_DEVICES=OpenMP
-
-pthreads:
+	make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
+test:
 	cd ./01_hello_world; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS}
 	cd ./01_hello_world_lambda; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce_lambda; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_simple_view; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_simple_view_lambda; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./04_simple_memoryspaces; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS}
 	cd ./05_simple_atomics; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS}
 	cd ./Advanced_Views; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Algorithms; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Hierarchical_Parallelism; \
-	make -j 4 KOKKOS_DEVICES=Pthreads
+	make test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 
-serial:
+test-insource:
 	cd ./01_hello_world; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./01_hello_world_lambda; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce_lambda; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_simple_view; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./03_simple_view_lambda; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./04_simple_memoryspaces; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./05_simple_atomics; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test -j 4 ${KOKKOS_SETTINGS}
 	cd ./Advanced_Views; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Algorithms; \
-	make -j 4 KOKKOS_DEVICES=Serial
+	make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Hierarchical_Parallelism; \
-	make -j 4 KOKKOS_DEVICES=Serial
-
-cuda:
+	make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
+clean:
 	cd ./01_hello_world; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS}
 	cd ./01_hello_world_lambda; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce_lambda; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_simple_view; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS}
 	cd ./03_simple_view_lambda; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS}
 	cd ./04_simple_memoryspaces; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS}
 	cd ./05_simple_atomics; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS}
 	cd ./Advanced_Views; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Algorithms; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Hierarchical_Parallelism; \
-	make -j 4 KOKKOS_DEVICES=Cuda,Serial
+	make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 
-clean:
+clean-insource:
 	cd ./01_hello_world; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./01_hello_world_lambda; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./02_simple_reduce_lambda; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./03_simple_view; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./03_simple_view_lambda; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./04_simple_memoryspaces; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./05_simple_atomics; \
-	make clean
+	make clean ${KOKKOS_SETTINGS}
 	cd ./Advanced_Views; \
-	make clean
+	make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Algorithms; \
-	make clean
+	make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
 	cd ./Hierarchical_Parallelism; \
-	make clean
-
+	make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}'
diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash
index 86f136da96ed10e0a2f23c0cb2752eaaa5287d90..6fa03ebb34aa4d8253bcb81f0388a3bdceff2e26 100755
--- a/lib/kokkos/generate_makefile.bash
+++ b/lib/kokkos/generate_makefile.bash
@@ -63,6 +63,20 @@ case $key in
     ;;
     --compiler*)
     COMPILER="${key#*=}"
+    CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l`
+    if [ ${CNUM} -gt 0 ]; then
+      echo "Invalid compiler by --compiler command: '${COMPILER}'"
+      exit
+    fi
+    if [[ ! -n  ${COMPILER} ]]; then
+      echo "Empty compiler specified by --compiler command."
+      exit
+    fi
+    CNUM=`which ${COMPILER} | grep ${COMPILER} | wc -l`
+    if [ ${CNUM} -eq 0 ]; then
+      echo "Invalid compiler by --compiler command: '${COMPILER}'"
+      exit
+    fi 
     ;;
     --with-options*)
     KOKKOS_OPT="${key#*=}"
@@ -80,15 +94,22 @@ case $key in
     echo "--with-devices:                   explicitly add a set of backends"
     echo ""
     echo "--arch=[OPTIONS]:            set target architectures. Options are:"
-    echo "                               SNB = Intel Sandy/Ivy Bridge CPUs"
-    echo "                               HSW = Intel Haswell CPUs"
-    echo "                               KNC = Intel Knights Corner Xeon Phi"
-    echo "                               KNL = Intel Knights Landing Xeon Phi"
-    echo "                               Kepler30  = NVIDIA Kepler generation CC 3.0"
-    echo "                               Kepler35  = NVIDIA Kepler generation CC 3.5"
-    echo "                               Kepler37  = NVIDIA Kepler generation CC 3.7"
-    echo "                               Maxwell50 = NVIDIA Maxwell generation CC 5.0"
-    echo "                               Power8 = IBM Power 8 CPUs"
+    echo "                               ARMv80         = ARMv8.0 Compatible CPU"
+    echo "                               ARMv81         = ARMv8.1 Compatible CPU"
+    echo "                               ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU"
+    echo "                               SNB            = Intel Sandy/Ivy Bridge CPUs"
+    echo "                               HSW            = Intel Haswell CPUs"
+    echo "                               BDW            = Intel Broadwell Xeon E-class CPUs"
+    echo "                               SKX            = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)"
+    echo "                               KNC            = Intel Knights Corner Xeon Phi"
+    echo "                               KNL            = Intel Knights Landing Xeon Phi"
+    echo "                               Kepler30       = NVIDIA Kepler generation CC 3.0"
+    echo "                               Kepler35       = NVIDIA Kepler generation CC 3.5"
+    echo "                               Kepler37       = NVIDIA Kepler generation CC 3.7"
+    echo "                               Pascal60       = NVIDIA Pascal generation CC 6.0"
+    echo "                               Pascal61       = NVIDIA Pascal generation CC 6.1"
+    echo "                               Maxwell50      = NVIDIA Maxwell generation CC 5.0"
+    echo "                               Power8         = IBM POWER8 CPUs"
     echo ""
     echo "--compiler=/Path/To/Compiler set the compiler"
     echo "--debug,-dbg:                enable Debugging"
@@ -127,50 +148,65 @@ echo "Running generate_makefile.sh in the Kokkos root directory is not allowed"
 exit 
 fi
 
-KOKKOS_OPTIONS="KOKKOS_PATH=${KOKKOS_PATH}"
+KOKKOS_SRC_PATH=${KOKKOS_PATH}
+
+KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}"
+#KOKKOS_SETTINGS="KOKKOS_PATH=${KOKKOS_PATH}"
 
 if [ ${#COMPILER} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXX=${COMPILER}"
-fi
-if [ ${#PREFIX} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} PREFIX=${PREFIX}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}"
 fi
 if [ ${#KOKKOS_DEVICES} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEVICES=${KOKKOS_DEVICES}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}"
 fi
 if [ ${#KOKKOS_ARCH} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_ARCH=${KOKKOS_ARCH}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}"
 fi
 if [ ${#KOKKOS_DEBUG} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEBUG=${KOKKOS_DEBUG}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}"
 fi
 if [ ${#CUDA_PATH} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CUDA_PATH=${CUDA_PATH}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}"
 fi
 if [ ${#CXXFLAGS} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXXFLAGS=\"${CXXFLAGS}\""
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\""
 fi
 if [ ${#LDFLAGS} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} LDFLAGS=\"${LDFLAGS}\""
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\""
 fi
 if [ ${#GTEST_PATH} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}"
 else
 GTEST_PATH=${KOKKOS_PATH}/tpls/gtest
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}"
 fi
 if [ ${#HWLOC_PATH} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc"
 fi
 if [ ${#QTHREAD_PATH} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} QTHREAD_PATH=${QTHREAD_PATH}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREAD_PATH=${QTHREAD_PATH}"
 fi
 if [ ${#KOKKOS_OPT} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_OPTIONS=${KOKKOS_OPT}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}"
 fi
 if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then
-KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}"
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}"
+fi
+
+KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}"
+
+KOKKOS_TEST_INSTALL_PATH="${PWD}/install"
+if [ ${#PREFIX} -gt 0 ]; then
+KOKKOS_INSTALL_PATH="${PREFIX}"
+else
+KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH}
 fi
+
+
+mkdir install
+echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/Makefile.kokkos
+echo "kokkos-clean:" >> install/Makefile.kokkos
+echo "" >> install/Makefile.kokkos
 mkdir core
 mkdir core/unit_test
 mkdir core/perf_test
@@ -184,126 +220,153 @@ mkdir example
 mkdir example/fixture
 mkdir example/feint
 mkdir example/fenl
+mkdir example/tutorial
 
 if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then
 mkdir example/ichol
 fi
 
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}"
+
 # Generate subdirectory makefiles.
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > core/unit_test/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/unit_test/Makefile
 echo "" >> core/unit_test/Makefile
 echo "all:" >> core/unit_test/Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS}" >> core/unit_test/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS}" >> core/unit_test/Makefile
 echo "" >> core/unit_test/Makefile
 echo "test: all" >> core/unit_test/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} test" >> core/unit_test/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} test" >> core/unit_test/Makefile
 echo "" >> core/unit_test/Makefile
 echo "clean:" >> core/unit_test/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} clean" >> core/unit_test/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/unit_test/Makefile
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > core/perf_test/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/perf_test/Makefile
 echo "" >> core/perf_test/Makefile
 echo "all:" >> core/perf_test/Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS}" >> core/perf_test/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS}" >> core/perf_test/Makefile
 echo "" >> core/perf_test/Makefile
 echo "test: all" >> core/perf_test/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} test" >> core/perf_test/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} test" >> core/perf_test/Makefile
 echo "" >> core/perf_test/Makefile
 echo "clean:" >> core/perf_test/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} clean" >> core/perf_test/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/perf_test/Makefile
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > containers/unit_tests/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/unit_tests/Makefile
 echo "" >> containers/unit_tests/Makefile
 echo "all:" >> containers/unit_tests/Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> containers/unit_tests/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/unit_tests/Makefile
 echo "" >> containers/unit_tests/Makefile
 echo "test: all" >> containers/unit_tests/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> containers/unit_tests/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/unit_tests/Makefile
 echo "" >> containers/unit_tests/Makefile
 echo "clean:" >> containers/unit_tests/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} clean" >> containers/unit_tests/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/unit_tests/Makefile
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > containers/performance_tests/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/performance_tests/Makefile
 echo "" >> containers/performance_tests/Makefile
 echo "all:" >> containers/performance_tests/Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS}" >> containers/performance_tests/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/performance_tests/Makefile
 echo "" >> containers/performance_tests/Makefile
 echo "test: all" >> containers/performance_tests/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} test" >> containers/performance_tests/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/performance_tests/Makefile
 echo "" >> containers/performance_tests/Makefile
 echo "clean:" >> containers/performance_tests/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} clean" >> containers/performance_tests/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/performance_tests/Makefile
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > algorithms/unit_tests/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > algorithms/unit_tests/Makefile
 echo "" >> algorithms/unit_tests/Makefile
 echo "all:" >> algorithms/unit_tests/Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> algorithms/unit_tests/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> algorithms/unit_tests/Makefile
 echo "" >> algorithms/unit_tests/Makefile
 echo "test: all" >> algorithms/unit_tests/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> algorithms/unit_tests/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> algorithms/unit_tests/Makefile
 echo "" >> algorithms/unit_tests/Makefile
 echo "clean:" >> algorithms/unit_tests/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} clean" >> algorithms/unit_tests/Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> algorithms/unit_tests/Makefile
+
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_TEST_INSTALL_PATH}"
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/fixture/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fixture/Makefile
 echo "" >> example/fixture/Makefile
 echo "all:" >> example/fixture/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS}" >> example/fixture/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS}" >> example/fixture/Makefile
 echo "" >> example/fixture/Makefile
 echo "test: all" >> example/fixture/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} test" >> example/fixture/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} test" >> example/fixture/Makefile
 echo "" >> example/fixture/Makefile
 echo "clean:" >> example/fixture/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} clean" >> example/fixture/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} clean" >> example/fixture/Makefile
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/feint/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/feint/Makefile
 echo "" >> example/feint/Makefile
 echo "all:" >> example/feint/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS}" >> example/feint/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS}" >> example/feint/Makefile
 echo "" >> example/feint/Makefile
 echo "test: all" >> example/feint/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} test" >> example/feint/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} test" >> example/feint/Makefile
 echo "" >> example/feint/Makefile
 echo "clean:" >> example/feint/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} clean" >> example/feint/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} clean" >> example/feint/Makefile
 
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/fenl/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fenl/Makefile
 echo "" >> example/fenl/Makefile
 echo "all:" >> example/fenl/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS}" >> example/fenl/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS}" >> example/fenl/Makefile
 echo "" >> example/fenl/Makefile
 echo "test: all" >> example/fenl/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} test" >> example/fenl/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} test" >> example/fenl/Makefile
 echo "" >> example/fenl/Makefile
 echo "clean:" >> example/fenl/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} clean" >> example/fenl/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} clean" >> example/fenl/Makefile
+
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/tutorial/Makefile
+echo "" >> example/tutorial/Makefile
+echo "build:" >> example/tutorial/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} build">> example/tutorial/Makefile
+echo "" >> example/tutorial/Makefile
+echo "test: build" >> example/tutorial/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} test" >> example/tutorial/Makefile
+echo "" >> example/tutorial/Makefile
+echo "clean:" >> example/tutorial/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile
+
 
 if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/ichol/Makefile
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/ichol/Makefile
 echo "" >> example/ichol/Makefile
 echo "all:" >> example/ichol/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS}" >> example/ichol/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS}" >> example/ichol/Makefile
 echo "" >> example/ichol/Makefile
 echo "test: all" >> example/ichol/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS} test" >> example/ichol/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} test" >> example/ichol/Makefile
 echo "" >> example/ichol/Makefile
 echo "clean:" >> example/ichol/Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS} clean" >> example/ichol/Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} clean" >> example/ichol/Makefile
 fi
 
+KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}"
+
 # Generate top level directory makefile.
-echo "Generating Makefiles with options " ${KOKKOS_OPTIONS}
-echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > Makefile
+echo "Generating Makefiles with options " ${KOKKOS_SETTINGS}
+echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > Makefile
 echo "" >> Makefile
-echo "lib:" >> Makefile
+echo "kokkoslib:" >> Makefile
 echo -e "\tcd core; \\" >> Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS}" >> Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} build-lib" >> Makefile
 echo "" >> Makefile
-echo "install: lib" >> Makefile
+echo "install: kokkoslib" >> Makefile
 echo -e "\tcd core; \\" >> Makefile
-echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} install" >> Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} install" >> Makefile
 echo "" >> Makefile
-echo "build-test:" >> Makefile
+echo "kokkoslib-test:" >> Makefile
+echo -e "\tcd core; \\" >> Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} build-lib" >> Makefile
+echo "" >> Makefile
+echo "install-test: kokkoslib-test" >> Makefile
+echo -e "\tcd core; \\" >> Makefile
+echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} install" >> Makefile
+echo "" >> Makefile
+echo "build-test: install-test" >> Makefile
 echo -e "\tmake -C core/unit_test" >> Makefile
 echo -e "\tmake -C core/perf_test" >> Makefile
 echo -e "\tmake -C containers/unit_tests" >> Makefile
@@ -312,6 +375,7 @@ echo -e "\tmake -C algorithms/unit_tests" >> Makefile
 echo -e "\tmake -C example/fixture" >> Makefile
 echo -e "\tmake -C example/feint" >> Makefile
 echo -e "\tmake -C example/fenl" >> Makefile
+echo -e "\tmake -C example/tutorial build" >> Makefile
 echo "" >> Makefile
 echo "test: build-test" >> Makefile
 echo -e "\tmake -C core/unit_test test" >> Makefile
@@ -322,6 +386,12 @@ echo -e "\tmake -C algorithms/unit_tests test" >> Makefile
 echo -e "\tmake -C example/fixture test" >> Makefile
 echo -e "\tmake -C example/feint test" >> Makefile
 echo -e "\tmake -C example/fenl test" >> Makefile
+echo -e "\tmake -C example/tutorial test" >> Makefile
+echo "" >> Makefile
+echo "unit-tests-only:" >> Makefile
+echo -e "\tmake -C core/unit_test test" >> Makefile
+echo -e "\tmake -C containers/unit_tests test" >> Makefile
+echo -e "\tmake -C algorithms/unit_tests test" >> Makefile
 echo "" >> Makefile
 echo "clean:" >> Makefile
 echo -e "\tmake -C core/unit_test clean" >> Makefile
@@ -332,5 +402,6 @@ echo -e "\tmake -C algorithms/unit_tests clean" >> Makefile
 echo -e "\tmake -C example/fixture clean" >> Makefile
 echo -e "\tmake -C example/feint clean" >> Makefile
 echo -e "\tmake -C example/fenl clean" >> Makefile
+echo -e "\tmake -C example/tutorial clean" >> Makefile
 echo -e "\tcd core; \\" >> Makefile
-echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} clean" >> Makefile
+echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} clean" >> Makefile
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index cc096058ecc1d368b5d645bffac718648388ac92..5b53b8ed05d5fb6fbe8a6b1a1001a70f83bfe726 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -915,6 +915,14 @@ void memset_kokkos (ViewType &view) {
   ViewType::execution_space::fence();
 }
 
+struct params_lj_coul {
+  KOKKOS_INLINE_FUNCTION
+  params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};   
+  KOKKOS_INLINE_FUNCTION
+  params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+  F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
+};
+
 #if defined(KOKKOS_HAVE_CXX11)
 #undef ISFINITE
 #define ISFINITE(x) std::isfinite(x)
diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
index 3c0b7d46aea065fb877723ed04f3e4ef9fb7e05e..048a7dab60cc343d1e30ec2315dfba60855d3a73 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
@@ -44,11 +44,6 @@ class PairLJCharmmCoulCharmmImplicitKokkos : public PairLJCharmmCoulCharmmImplic
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
 
  protected:
   void cleanup_copy();
diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
index 202cda68b31d4fc3dd90f75de2cab0dfcffbfafd..db0b14a84ec368ea0ae4832c39d83a931d18809b 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
@@ -44,11 +44,6 @@ class PairLJCharmmCoulCharmmKokkos : public PairLJCharmmCoulCharmm {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
 
  protected:
   void cleanup_copy();
diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
index fcdab7ddb626971587f47cafae79214ca6563cdf..0969d11b0ec3f8460a05fb1929a81f33f6b37d1d 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
@@ -43,12 +43,6 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
-
  protected:
   void cleanup_copy();
 
diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
index 1ea5bc69bf4c9032af67573b80d6a980891f7750..c3492666dece0f786014fe0990a9004773578315 100644
--- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
@@ -43,11 +43,6 @@ class PairLJClass2CoulCutKokkos : public PairLJClass2CoulCut {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
 
  protected:
   void cleanup_copy();
diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
index 0b1b2dc90b64cc18164eb1d4e4ccfab50773fb48..c5c46ed2d52c31db795f883622165117a7745f5e 100644
--- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
@@ -44,12 +44,6 @@ class PairLJClass2CoulLongKokkos : public PairLJClass2CoulLong {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
-
  protected:
   void cleanup_copy();
 
diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
index 36f31d176c0365640c0bddd31f25a180c51ac8ba..5891371d140285e1e8f6fe0a5697b15cd263dc80 100644
--- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
@@ -43,14 +43,6 @@ class PairLJCutCoulCutKokkos : public PairLJCutCoulCut {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    KOKKOS_INLINE_FUNCTION
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    KOKKOS_INLINE_FUNCTION
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
-
  protected:
   void cleanup_copy();
 
diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
index 9e1e30abaddf6f43ad575921228c9d8e61ac9918..d507f76a3a94adaedfcc6faa70217c5f42097d16 100644
--- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
@@ -43,12 +43,6 @@ class PairLJCutCoulDebyeKokkos : public PairLJCutCoulDebye {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
-
  protected:
   void cleanup_copy();
 
diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
index b1f578ec0bf109d1b4fc435410d574b3cdd1ebd3..3e378757c0f7c01d9dc15558b6381c9a482878f1 100644
--- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
@@ -42,12 +42,6 @@ class PairLJCutCoulDSFKokkos : public PairLJCutCoulDSF {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
-
  protected:
   void cleanup_copy();
 
diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
index 5bdaaf96ca60f625ee93e4ae1531c2e98dc61e65..73283292323e967950394c0ffaef29b630aa4539 100644
--- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
@@ -44,12 +44,6 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
-    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
-  };
-
  protected:
   void cleanup_copy();
 
diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp
index 499a826676b06137ec5630c0f218d786d3d2b77f..b636f36499a8ffd5c765615634af49218acb448b 100644
--- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp
@@ -286,7 +286,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::allocate()
   memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
 
-  k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJGromacsCoulGromacs::params",n+1,n+1);
+  k_params = Kokkos::DualView<params_lj_coul_gromacs**,Kokkos::LayoutRight,DeviceType>("PairLJGromacsCoulGromacs::params",n+1,n+1);
   params = k_params.d_view;
 }
 
diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
index 8b10eb71a36f6dee7a3dbca271bee661244db386..bbf5c50a624148060206a4ebff87e3e8a2b466a3 100644
--- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
+++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
@@ -44,9 +44,11 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs {
   void init_style();
   double init_one(int, int);
 
-  struct params_lj_coul{
-    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;};
-    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;};
+  struct params_lj_coul_gromacs{
+    KOKKOS_INLINE_FUNCTION
+    params_lj_coul_gromacs(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;};
+    KOKKOS_INLINE_FUNCTION
+    params_lj_coul_gromacs(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;};
     F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset,ljsw1,ljsw2,ljsw3,ljsw4,ljsw5;
   };
 
@@ -73,11 +75,11 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs {
   F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
                         const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
 
-  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
-  typename Kokkos::DualView<params_lj_coul**,
+  Kokkos::DualView<params_lj_coul_gromacs**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_coul_gromacs**,
     Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
   // hardwired to space for 12 atom types
-  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  params_lj_coul_gromacs m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
 
   F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
   F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
diff --git a/src/MAKE/MACHINES/Makefile.white b/src/MAKE/MACHINES/Makefile.white
index ae31664b0dde69d357aae9ce826421cf14a30fa4..53de76e7362b252aea3404bbad66562ab70f9f6d 100644
--- a/src/MAKE/MACHINES/Makefile.white
+++ b/src/MAKE/MACHINES/Makefile.white
@@ -23,7 +23,6 @@ ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 KOKKOS_DEVICES = Cuda, OpenMP
 KOKKOS_ARCH =   Kepler35
-KOKKOS_CUDA_OPTIONS = enable_lambda
 
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings, all OPTIONAL
diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich
index efdc728bdf01ae6ec6fa5853053bb25fb8def07f..be0c2d1913bc1aef371ee89d040368578dd2c170 100644
--- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich
+++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich
@@ -7,13 +7,14 @@ SHELL = /bin/sh
 # specify flags and libraries needed for your compiler
 
 KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
-CC =		mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
+export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
+CC =		mpicxx
 CCFLAGS =	-g -O3
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
-LINK =		mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
-LINKFLAGS =	-g -O
+LINK =		mpicxx
+LINKFLAGS =	-g -O3
 LIB = 
 SIZE =		size