diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md
index 3fe9e46111b9f858d84e4579c8b026ae37dda472..43d3f17d634eacb38cf1f017afb9fea5706ac7cb 100644
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Change Log
 
+## [2.04.00](https://github.com/kokkos/kokkos/tree/2.04.00) (2017-08-16)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.13...2.04.00)
+
+**Implemented enhancements:**
+
+- Added ROCm backend to support AMD GPUs
+- Kokkos::complex\<T\> behaves slightly differently from std::complex\<T\> [\#1011](https://github.com/kokkos/kokkos/issues/1011)
+- Kokkos::Experimental::Crs constructor arguments were in the wrong order [\#992](https://github.com/kokkos/kokkos/issues/992)
+- Work graph construction ease-of-use (one lambda for count and fill) [\#991](https://github.com/kokkos/kokkos/issues/991)
+- when\_all returns pointer of futures (improved interface) [\#990](https://github.com/kokkos/kokkos/issues/990)
+- Allow assignment of LayoutLeft to LayoutRight or vice versa for rank-0 Views [\#594](https://github.com/kokkos/kokkos/issues/594)
+- Changed the meaning of Kokkos\_ENABLE\_CXX11\_DISPATCH\_LAMBDA [\#1035](https://github.com/kokkos/kokkos/issues/1035)
+
+**Fixed bugs:**
+
+- memory pool default constructor does not properly set member variables. [\#1007](https://github.com/kokkos/kokkos/issues/1007)
+
 ## [2.03.13](https://github.com/kokkos/kokkos/tree/2.03.13) (2017-07-27)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.05...2.03.13)
 
diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos
index d2967cf9a3fe51d5335e50969f31368e71cbad07..b8236e8fd11d4a2a1b8123d16d9287697192bdad 100644
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@@ -4,10 +4,16 @@
 KOKKOS_PATH=../../lib/kokkos
 CXXFLAGS=$(CCFLAGS)
 
-# Options: Cuda,OpenMP,Pthreads,Qthreads,Serial
+# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
 KOKKOS_DEVICES ?= "OpenMP"
 #KOKKOS_DEVICES ?= "Pthreads"
-# Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
+# Options: 
+# Intel:    KNC,KNL,SNB,HSW,BDW,SKX
+# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
+# ARM:      ARMv80,ARMv81,ARMv8-ThunderX
+# IBM:      BGQ,Power7,Power8,Power9
+# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
+# AMD-CPUS: AMDAVX,Ryzen,Epyc
 KOKKOS_ARCH ?= ""
 # Options: yes,no
 KOKKOS_DEBUG ?= "no"
@@ -43,8 +49,8 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | gr
 KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
 KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
 
+
 # Check for Kokkos Host Execution Spaces one of which must be on.
-KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
 KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
 KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
 KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
@@ -60,6 +66,8 @@ endif
 
 # Check for other Execution Spaces.
 KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
+KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
+KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
 
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
   KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
@@ -87,6 +95,7 @@ ifneq ($(MPICH_CXX),)
 endif
 KOKKOS_INTERNAL_COMPILER_CLANG       := $(strip $(shell $(CXX) --version       2>&1 | grep clang               | wc -l))
 KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version       2>&1 | grep "apple-darwin"      | wc -l))
+KOKKOS_INTERNAL_COMPILER_HCC         := $(strip $(shell $(CXX) --version       2>&1 | grep HCC                 | wc -l))
 
 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
   KOKKOS_INTERNAL_COMPILER_CLANG = 1
@@ -99,6 +108,10 @@ endif
 ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
   KOKKOS_INTERNAL_COMPILER_CLANG = 0
 endif
+# AMD HCC passes both clang and hcc test so turn off clang
+ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
+  KOKKOS_INTENAL_COMPILER_CLANG = 0
+endif
 
 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
   KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
@@ -183,8 +196,12 @@ else
     ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
       KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
     else
-      KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
-      KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
+      ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
+        KOKKOS_INTERNAL_CXX11_FLAG := 
+      else
+        KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
+        KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
+      endif
     endif
   endif
 endif
@@ -259,6 +276,13 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
 
 # AMD based.
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))
 
 # Any AVX?
 KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
@@ -271,6 +295,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
 KOKKOS_INTERNAL_USE_ISA_X86_64    := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
 KOKKOS_INTERNAL_USE_ISA_KNC       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
+KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))
 
 # Decide whether we can support transactional memory
 KOKKOS_INTERNAL_USE_TM            := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
@@ -319,8 +344,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
   tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
+  tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
-        tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
+  tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@@ -363,6 +392,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
   tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
+  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
+  tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
+  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
+endif
+
 tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
   KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
@@ -561,6 +596,18 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
   endif
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
+  tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )
+
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+
+  else
+    # Assume that this is a really a GNU compiler or it could be XL on P8.
+    KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7
+    KOKKOS_LDFLAGS  += -mcpu=power7 -mtune=power7
+  endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
   tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
 
@@ -742,7 +789,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
   endif
 endif
 
+# Figure out the architecture flag for ROCm.
+ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
+  # Lets start with adding architecture defines
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900 
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901 
+  endif
+ 
+  
+  KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
+  ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)
+
+  KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) 
+  KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm 
+  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
+
+  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
+  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
+endif
+
 KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
+
 ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
   KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
 else
diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets
index a9341a907c15009eb270c1b44bb4074f8f1f8cb5..964ec966d5ab458b5c09b647bb9040d72f937921 100644
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@@ -42,6 +42,17 @@ Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_C
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
+Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
+Kokkos_ROCm_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Space.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Space.cpp
+Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
+Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
 Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
diff --git a/lib/kokkos/README b/lib/kokkos/README
index 257a2e5db475dea8c89f1468c42432614c909762..e5ed39ef53dc25804f8663f12f9da2a92cb02e29 100644
--- a/lib/kokkos/README
+++ b/lib/kokkos/README
@@ -80,6 +80,9 @@ Other compilers working:
   X86:
    Cygwin 2.1.0 64bit with gcc 4.9.3
 
+Limited testing of the following compilers on POWER7+ systems:
+  GCC 4.8.5 (on RHEL7.1 POWER7+)
+
 Known non-working combinations:
   Power8:
    Pthreads backend
@@ -171,3 +174,22 @@ Contributions to Kokkos are welcome. In order to do so, please open an issue
 where a feature request or bug can be discussed. Then issue a pull request
 with your contribution. Pull requests must be issued against the develop branch. 
 
+===========================================================================
+====Citing Kokkos==========================================================
+===========================================================================
+
+If you publish work which mentions Kokkos, please cite the following paper:
+
+@article{CarterEdwards20143202,
+title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
+journal = "Journal of Parallel and Distributed Computing ",
+volume = "74",
+number = "12",
+pages = "3202 - 3216",
+year = "2014",
+note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ",
+issn = "0743-7315",
+doi = "https://doi.org/10.1016/j.jpdc.2014.07.003",
+url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257",
+author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland"
+}
diff --git a/lib/kokkos/config/kokkos-promotion.txt b/lib/kokkos/config/kokkos-promotion.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d56298b4163c9506ed0b4a551b220752de8449b2
--- /dev/null
+++ b/lib/kokkos/config/kokkos-promotion.txt
@@ -0,0 +1,140 @@
+Summary:
+
+- Step 1: Testing Kokkos itself using test_all_sandia
+
+- Step 2: Testing of Kokkos integrated into Trilinos (config/trilinos-integration/*.sh)
+
+- Step 3: Locally update CHANGELOG, merge into master, edit config/master_history.txt
+
+- Step 4: Locally snapshot new master into corresponding Trilinos branch (develop or temporary), push with checking-test-sems.sh
+
+- Step 5: Push local Kokkos master to GitHub (need Owner approval)
+
+Steps 1, 2, and 4 include testing that may fail. These failures must be fixed either by pull requests to Kokkos develop, or by creating a new Trilinos branch for parts of Trilinos that must be updated. This is what usually takes the most time.
+
+
+// -------------------------------------------------------------------------------- //
+
+
+Step 1: The following should be repeated on enough machines to cover all
+supported compilers. Those machines are:
+
+    kokkos-dev
+    ??? <- TODO: identify other machines
+
+  1.1. Clone kokkos develop branch (or just switch to it)
+
+         git clone -b develop git@github.com:kokkos/kokkos.git
+         cd kokkos
+
+  1.2. Create a testing directory
+
+         mkdir testing
+         cd testing
+
+  1.3. Run the test_all_sandia script with no options to test all compilers
+
+         nohup ../config/test_all_sandia &
+         tail -f nohup.out                   # to watch progress
+
+// -------------------------------------------------------------------------------- //
+
+Step 2:
+  2.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard that are provided in kokkos/config/trilinos-integration. These scripts load their own modules/environment, so don't require preparation. You can run all four at the same time, use separate directories for each.
+
+         mkdir serial
+         cd serial
+         nohup KOKKOS_PATH/config/trilinos-integration/shepard_jenkins_run_script_serial_intel &
+
+  2.2. Compare the compile errors and test failures between updated and pristine versions. There may be compile failures that happen in both, tests that fail in both, and there may be tests that only fail some times (thus, rerun tests manually as needed).
+
+// -------------------------------------------------------------------------------- //
+
+Step 3: This step should be run on kokkos-dev
+
+  3.1. If you don't have a GitHub token already, generate one for yourself (this will give you TOKEN):
+
+       https://github.com/settings/tokens
+
+  3.2. Get a clean copy of the Kokkos develop branch
+
+       git clone -b develop git@github.com:kokkos/kokkos.git
+       cd kokkos
+
+  3.3. Generate the initial changelog. Use the most recent tag as OLDTAG (`git tag -l` can show you all tags). The NEWTAG is the new version number, e.g. "2.04.00". RUN THIS OUTSIDE THE KOKKOS SOURCE TREE!
+
+       module load ruby/2.3.1/gcc/5.3.0
+       gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
+       cat CHANGELOG.md
+
+  3.4. Manually cleanup and commit the change log. Pushing to develop requires Owner permission.
+       (Copy the new section from the generated CHANGELOG.md to KOKKOS_PATH/CHANGELOG.md)
+       (Make desired changes to CHANGELOG.md to enhance clarity (remove issues not noteworthy))
+       (Commit and push the CHANGELOG.md to develop)
+
+  3.5. Merge develop into master. DO NOT FAST-FORWARD THE MERGE!!!!
+
+       (From kokkos directory):
+       git checkout master
+       git merge --no-ff origin/develop
+
+  3.6. Update the tag in kokkos/config/master_history.txt
+
+       Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
+       Tag field widths: #.#.##
+       date description: month:day:year
+       date field widths: ##:##:####
+       master description: SHA1 of previous master commit (use `git log`?)
+       develop description: SHA1 of merged develop branch
+       SHA1 field width: ######## (8 chars)
+
+       # Append to config/master_history.txt:
+
+       tag:  2.03.13    date: 07:27:2017    master: da314444    develop: 29ccb58a
+       
+       git commit --amend -a
+
+
+  3.7. Create the new tag:
+
+       git tag -a #.#.##
+
+         (type the following into the tag message (same as for step 4.3))
+         tag: #.#.##
+         date: mm/dd/yyyy
+         master: sha1
+         develop: sha1
+
+  3.8. DO NOT PUSH YET !!!
+
+
+// -------------------------------------------------------------------------------- //
+
+Step 4: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, the checkin step requires lots of disk space and RAM. Use ceerws1113 if you have access to it.
+
+  4.1 Clone the Trilinos corresponding branch (or just switch to it)
+
+        git clone -b develop git@github.com:trilinos/Trilinos.git
+        TRILINOS_PATH=$PWD/Trilinos
+
+  4.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees.
+
+        module load sems-python/2.7.9
+        python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
+
+  4.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3)
+
+       cd TRILINOS_PATH
+       mkdir CHECKIN
+       cd CHECKIN
+       nohup ../cmake/std/sems/checkin-test-sems.sh --do-all --push &
+
+  4.4. If there are failures, fix and backtrack. Otherwise, go to next step
+
+// -------------------------------------------------------------------------------- //
+
+Step 5: Push Kokkos master to GitHub (requires Owner permission).
+      
+       cd KOKKOS_PATH
+       git push --follow-tags origin master 
+
diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt
index 0447db4b2b7bf7638de2ab89d082b4faa6ba2bfe..96b05c02e1fae8d1c4cb9cb914b24786f7b4a1c9 100644
--- a/lib/kokkos/config/master_history.txt
+++ b/lib/kokkos/config/master_history.txt
@@ -8,3 +8,4 @@ tag:  2.02.15    date: 02:10:2017    master: 8c64cd93    develop: 28dea8b6
 tag:  2.03.00    date: 04:25:2017    master: 120d9ce7    develop: 015ba641
 tag:  2.03.05    date: 05:27:2017    master: 36b92f43    develop: 79073186
 tag:  2.03.13    date: 07:27:2017    master: da314444    develop: 29ccb58a
+tag:  2.04.00    date: 08:16:2017    master: 54eb75c0    develop: 32fb8ee1
diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia
index 005cd2072132cf50e7d73fa92767a9df5956a0db..e6fcaad2614dc6b19e23b895ebb237ca83d350c5 100755
--- a/lib/kokkos/config/test_all_sandia
+++ b/lib/kokkos/config/test_all_sandia
@@ -167,7 +167,6 @@ if [ "$MACHINE" = "sems" ]; then
                "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in
index 621cd54e1c84d954bd32bf69cbe7feeeba77947d..599c6b022409f1446cda3b15b985f080171e57b4 100644
--- a/lib/kokkos/core/cmake/KokkosCore_config.h.in
+++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in
@@ -1,15 +1,15 @@
-#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
-#error "Don't include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
-#else
-#define KOKKOS_CORE_CONFIG_H
-#endif
-
 /* The trivial 'src/build_common.sh' creates a config
  * that must stay in sync with this file.
  */
 #cmakedefine KOKKOS_FOR_SIERRA
 
-#ifndef KOKKOS_FOR_SIERRA
+#if !defined(KOKKOS_FOR_SIERRA)
+
+#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
+#error "Don't include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
+#else
+#define KOKKOS_CORE_CONFIG_H
+#endif
 
 #cmakedefine KOKKOS_HAVE_CUDA
 #cmakedefine KOKKOS_HAVE_OPENMP
@@ -93,12 +93,6 @@
 #cmakedefine KOKKOS_ARCH_PASCAL60 1
 #cmakedefine KOKKOS_ARCH_PASCAL61 1
 
-// Don't forbid users from defining this macro on the command line,
-// but still make sure that CMake logic can control its definition.
-#ifndef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
-#cmakedefine KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
-#endif
-
 // TODO: These are currently not used in Kokkos.  Should they be removed?
 #cmakedefine KOKKOS_HAVE_MPI
 #cmakedefine KOKKOS_HAVE_CUSPARSE
@@ -107,4 +101,4 @@
 #cmakedefine KOKKOS_USING_DEPRECATED_VIEW
 #cmakedefine KOKKOS_HAVE_CXX11
 
-#endif // KOKKOS_FOR_SIERRA
+#endif // !defined(KOKKOS_FOR_SIERRA)
diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt
index 492470d05d07ee5684a04bff54fc103e82708ba9..0d5d97a829488090d8ecd721e13301b494196557 100644
--- a/lib/kokkos/core/src/CMakeLists.txt
+++ b/lib/kokkos/core/src/CMakeLists.txt
@@ -9,30 +9,6 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
 ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
 ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
 
-# Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA governs whether Kokkos allows
-# use of lambdas at the outer level of parallel dispatch (that is, as
-# the argument to an outer parallel_for, parallel_reduce, or
-# parallel_scan).  This works with non-CUDA execution spaces if C++11
-# is enabled.  It does not currently work with public releases of
-# CUDA.  If that changes, please change the default here to ON if CUDA
-# and C++11 are ON.
-IF (${PROJECT_NAME}_ENABLE_CXX11)
-  IF (${PACKAGE_NAME}_ENABLE_CUDA)
-    SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF)
-  ELSE ()
-    SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT ON)
-  ENDIF ()
-ELSE ()
-  SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF)
-ENDIF ()
-
-TRIBITS_ADD_OPTION_AND_DEFINE(
-  Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA
-  KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
-  "Whether Kokkos allows use of lambdas at the outer level of parallel dispatch (that is, as the argument to an outer parallel_for, parallel_reduce, or parallel_scan).  This requires C++11.  It also does not currently work with public releases of CUDA.  As a result, even if C++11 is enabled, this will be OFF by default if CUDA is enabled.  If this option is ON, the macro KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA will be defined.  For compatibility with Kokkos' Makefile build system, it is also possible to define that macro on the command line."
-  ${Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT}
-  )
-
 TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp
index abb263b7ccd7d6f82f469d06fadbc2326fe21438..43e6386b54fce21c1053103aa82e540e1b69e6ce 100644
--- a/lib/kokkos/core/src/Kokkos_Array.hpp
+++ b/lib/kokkos/core/src/Kokkos_Array.hpp
@@ -152,10 +152,10 @@ public:
   KOKKOS_INLINE_FUNCTION pointer       data()       { return pointer(0) ; }
   KOKKOS_INLINE_FUNCTION const_pointer data() const { return const_pointer(0); }
 
-  ~Array() = default ;
-  Array() = default ;
-  Array( const Array & ) = default ;
-  Array & operator = ( const Array & ) = default ;
+  KOKKOS_FUNCTION_DEFAULTED ~Array() = default ;
+  KOKKOS_FUNCTION_DEFAULTED Array() = default ;
+  KOKKOS_FUNCTION_DEFAULTED Array( const Array & ) = default ;
+  KOKKOS_FUNCTION_DEFAULTED Array & operator = ( const Array & ) = default ;
 
   // Some supported compilers are not sufficiently C++11 compliant
   // for default move constructor and move assignment operator.
@@ -209,7 +209,7 @@ public:
   KOKKOS_INLINE_FUNCTION pointer       data()       { return m_elem ; }
   KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
 
-  ~Array() = default ;
+  KOKKOS_FUNCTION_DEFAULTED ~Array() = default ;
   Array() = delete ;
   Array( const Array & rhs ) = delete ;
 
@@ -278,7 +278,7 @@ public:
   KOKKOS_INLINE_FUNCTION pointer       data()       { return m_elem ; }
   KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
 
-  ~Array() = default ;
+  KOKKOS_FUNCTION_DEFAULTED ~Array() = default ;
   Array()  = delete ;
   Array( const Array & ) = delete ;
 
diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp
index 3c8673c66a4783cf6dabcb6096568cc8cf8f0d7d..54a97a16c1338187e18b54d176d9241f8ba8253c 100644
--- a/lib/kokkos/core/src/Kokkos_Atomic.hpp
+++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp
@@ -80,6 +80,11 @@
 // Compiling NVIDIA device code, must use Cuda atomics:
 
 #define KOKKOS_ENABLE_CUDA_ATOMICS
+
+#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU)
+
+#define KOKKOS_ENABLE_ROCM_ATOMICS
+
 #endif
 
 #if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
@@ -154,6 +159,19 @@ const char * atomic_query_version()
 
 } // namespace Kokkos
 
+#if defined( KOKKOS_ENABLE_ROCM )
+#include <ROCm/Kokkos_ROCm_Atomic.hpp>
+namespace Kokkos {
+namespace Impl {
+extern KOKKOS_INLINE_FUNCTION
+bool lock_address_rocm_space(void* ptr);
+
+extern KOKKOS_INLINE_FUNCTION
+void unlock_address_rocm_space(void* ptr);
+}
+}
+#endif
+
 #ifdef _WIN32
 #include "impl/Kokkos_Atomic_Windows.hpp"
 #else
diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp
index 1fe964a6d2cf5ae5fa69478c82e83429a762ddf4..26b47a8b749e7340692e5d9a6a13273cb0e0f8f4 100644
--- a/lib/kokkos/core/src/Kokkos_Complex.hpp
+++ b/lib/kokkos/core/src/Kokkos_Complex.hpp
@@ -107,6 +107,11 @@ public:
     re_ (val), im_ (0.0)
   {}
 
+  // BUG HCC WORKAROUND
+  KOKKOS_INLINE_FUNCTION complex( const RealType& re, const RealType& im):
+    re_ (re), im_ (im)
+  {}
+ 
   //! Constructor that takes the real and imaginary parts.
   template<class RealType1, class RealType2>
   KOKKOS_INLINE_FUNCTION complex (const RealType1& re, const RealType2& im) :
@@ -227,6 +232,16 @@ public:
     return re_;
   }
 
+  //! Set the imaginary part of this complex number.
+  KOKKOS_INLINE_FUNCTION void imag (RealType v) {
+    im_ = v;
+  }
+
+  //! Set the real part of this complex number.
+  KOKKOS_INLINE_FUNCTION void real (RealType v) {
+    re_ = v;
+  }
+
   KOKKOS_INLINE_FUNCTION
   complex<RealType>& operator += (const complex<RealType>& src) {
     re_ += src.re_;
@@ -299,7 +314,7 @@ public:
     // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
     // If the real part is +/-Inf and the imaginary part is -/+Inf,
     // this won't change the result.
-    const RealType s = ::fabs (y.real ()) + ::fabs (y.imag ());
+    const RealType s = std::fabs (y.real ()) + std::fabs (y.imag ());
 
     // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
     // In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
@@ -537,7 +552,7 @@ operator / (const complex<RealType>& x, const complex<RealType>& y) {
   // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
   // If the real part is +/-Inf and the imaginary part is -/+Inf,
   // this won't change the result.
-  const RealType s = ::fabs (real (y)) + ::fabs (imag (y));
+  const RealType s = std::fabs (real (y)) + std::fabs (imag (y));
 
   // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
   // In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp
index ddb11d28944d2145dcf46c8bd1511d03732e0d3f..3748c35eb0a52d5c1011f4618c9440cbba3bcf99 100644
--- a/lib/kokkos/core/src/Kokkos_Core.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core.hpp
@@ -74,6 +74,10 @@
 #include <Kokkos_Cuda.hpp>
 #endif
 
+#if defined( KOKKOS_ENABLE_ROCM )
+#include <Kokkos_ROCm.hpp>
+#endif
+
 #include <Kokkos_Pair.hpp>
 #include <Kokkos_MemoryPool.hpp>
 #include <Kokkos_Array.hpp>
diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
index 8c080f7a8fbd4cc35c39fd0ed601e4d456346001..29001e19eddb4fb48bfad15043684b4884c8a29e 100644
--- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
@@ -122,6 +122,13 @@ class CudaHostPinnedSpace;  ///< Memory space on Host accessible to Cuda GPU
 class Cuda;                 ///< Execution space for Cuda GPU
 #endif
 
+#if defined( KOKKOS_ENABLE_ROCM )
+namespace Experimental {
+class ROCmSpace ;            ///< Memory space on ROCm GPU
+class ROCm ;                 ///< Execution space for ROCm GPU
+}
+#endif
+
 template<class ExecutionSpace, class MemorySpace>
 struct Device;
 
@@ -140,6 +147,8 @@ namespace Kokkos {
   typedef Cuda DefaultExecutionSpace;
 #elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET )
   typedef Experimental::OpenMPTarget DefaultExecutionSpace ;
+#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM )
+  typedef Experimental::ROCm DefaultExecutionSpace ;
 #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
   typedef OpenMP DefaultExecutionSpace;
 #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
@@ -185,6 +194,8 @@ namespace Impl {
 
 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined( KOKKOS_ENABLE_CUDA )
 typedef Kokkos::CudaSpace  ActiveExecutionMemorySpace;
+#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU )
+typedef Kokkos::HostSpace  ActiveExecutionMemorySpace ;
 #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
 typedef Kokkos::HostSpace  ActiveExecutionMemorySpace;
 #else
diff --git a/lib/kokkos/core/src/Kokkos_Crs.hpp b/lib/kokkos/core/src/Kokkos_Crs.hpp
index 93b3fa5ca9e479f1a5666b4776257e536e9e73d8..f089c16ad2ca3c71ad7cdf2042e19bee93a7e0ed 100644
--- a/lib/kokkos/core/src/Kokkos_Crs.hpp
+++ b/lib/kokkos/core/src/Kokkos_Crs.hpp
@@ -98,18 +98,18 @@ public:
   typedef View<size_type* , array_layout, device_type> row_map_type;
   typedef View<DataType*  , array_layout, device_type> entries_type;
 
-  entries_type entries;
   row_map_type row_map;
+  entries_type entries;
 
   //! Construct an empty view.
-  Crs () : entries(), row_map() {}
+  Crs() : row_map(), entries() {}
 
   //! Copy constructor (shallow copy).
-  Crs (const Crs& rhs) : entries (rhs.entries), row_map (rhs.row_map)
+  Crs(const Crs& rhs) : row_map(rhs.row_map), entries(rhs.entries)
   {}
 
   template<class EntriesType, class RowMapType>
-  Crs (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
+  Crs(const RowMapType& row_map_, const EntriesType& entries_) : row_map(row_map_), entries(entries_)
   {}
 
   /** \brief  Assign to a view of the rhs array.
@@ -117,8 +117,8 @@ public:
    *          then allocated memory is deallocated.
    */
   Crs& operator= (const Crs& rhs) {
-    entries = rhs.entries;
     row_map = rhs.row_map;
+    entries = rhs.entries;
     return *this;
   }
 
@@ -151,7 +151,7 @@ void get_crs_transpose_counts(
 
 template< class OutCounts,
           class InCrs>
-void get_crs_row_map_from_counts(
+typename OutCounts::value_type get_crs_row_map_from_counts(
     OutCounts& out,
     InCrs const& in,
     std::string const& name = "row_map");
@@ -204,18 +204,20 @@ class CrsRowMapFromCounts {
   using execution_space = typename InCounts::execution_space;
   using value_type = typename OutRowMap::value_type;
   using index_type = typename InCounts::size_type;
+  using last_value_type = Kokkos::View<value_type, execution_space>;
  private:
-  InCounts in;
-  OutRowMap out;
+  InCounts m_in;
+  OutRowMap m_out;
+  last_value_type m_last_value;
  public:
   KOKKOS_INLINE_FUNCTION
   void operator()(index_type i, value_type& update, bool final_pass) const {
-    update += in(i);
-    if (final_pass) {
-      out(i + 1) = update;
-      if (i == 0) {
-        out(0) = 0;
-      }
+    if (i < m_in.size()) {
+      update += m_in(i);
+      if (final_pass) m_out(i + 1) = update;
+    } else if (final_pass) {
+      m_out(0) = 0;
+      m_last_value() = update;
     }
   }
   KOKKOS_INLINE_FUNCTION
@@ -226,12 +228,16 @@ class CrsRowMapFromCounts {
   }
   using self_type = CrsRowMapFromCounts<InCounts, OutRowMap>;
   CrsRowMapFromCounts(InCounts const& arg_in, OutRowMap const& arg_out):
-    in(arg_in),out(arg_out) {
+    m_in(arg_in), m_out(arg_out), m_last_value("last_value") {
+  }
+  value_type execute() {
     using policy_type = RangePolicy<index_type, execution_space>;
     using closure_type = Kokkos::Impl::ParallelScan<self_type, policy_type>;
-    closure_type closure(*this, policy_type(0, in.size()));
+    closure_type closure(*this, policy_type(0, m_in.size() + 1));
     closure.execute();
-    execution_space::fence();
+    auto last_value = Kokkos::create_mirror_view(m_last_value);
+    Kokkos::deep_copy(last_value, m_last_value);
+    return last_value();
   }
 };
 
@@ -297,13 +303,14 @@ void get_crs_transpose_counts(
 
 template< class OutRowMap,
           class InCounts>
-void get_crs_row_map_from_counts(
+typename OutRowMap::value_type get_crs_row_map_from_counts(
     OutRowMap& out,
     InCounts const& in,
     std::string const& name) {
   out = OutRowMap(ViewAllocateWithoutInitializing(name), in.size() + 1);
   Kokkos::Impl::Experimental::
     CrsRowMapFromCounts<InCounts, OutRowMap> functor(in, out);
+  return functor.execute();
 }
 
 template< class DataType,
@@ -328,6 +335,65 @@ void transpose_crs(
     FillCrsTransposeEntries<crs_type, crs_type> entries_functor(in, out);
 }
 
+template< class CrsType,
+          class Functor>
+struct CountAndFill {
+  using data_type = typename CrsType::size_type;
+  using size_type = typename CrsType::size_type;
+  using row_map_type = typename CrsType::row_map_type;
+  using entries_type = typename CrsType::entries_type;
+  using counts_type = row_map_type;
+  CrsType m_crs;
+  Functor m_functor;
+  counts_type m_counts;
+  struct Count {};
+  KOKKOS_INLINE_FUNCTION void operator()(Count, size_type i) const {
+    m_counts(i) = m_functor(i, nullptr);
+  }
+  struct Fill {};
+  KOKKOS_INLINE_FUNCTION void operator()(Fill, size_type i) const {
+    auto j = m_crs.row_map(i);
+    data_type* fill = &(m_crs.entries(j));
+    m_functor(i, fill);
+  }
+  using self_type = CountAndFill<CrsType, Functor>;
+  CountAndFill(CrsType& crs, size_type nrows, Functor const& f):
+    m_crs(crs),
+    m_functor(f)
+  {
+    using execution_space = typename CrsType::execution_space;
+    m_counts = counts_type("counts", nrows);
+    {
+    using count_policy_type = RangePolicy<size_type, execution_space, Count>;
+    using count_closure_type =
+      Kokkos::Impl::ParallelFor<self_type, count_policy_type>;
+    const count_closure_type closure(*this, count_policy_type(0, nrows));
+    closure.execute();
+    }
+    auto nentries = Kokkos::Experimental::
+      get_crs_row_map_from_counts(m_crs.row_map, m_counts);
+    m_counts = counts_type();
+    m_crs.entries = entries_type("entries", nentries);
+    {
+    using fill_policy_type = RangePolicy<size_type, execution_space, Fill>;
+    using fill_closure_type =
+      Kokkos::Impl::ParallelFor<self_type, fill_policy_type>;
+    const fill_closure_type closure(*this, fill_policy_type(0, nrows));
+    closure.execute();
+    }
+    crs = m_crs;
+  }
+};
+
+template< class CrsType,
+          class Functor>
+void count_and_fill_crs(
+    CrsType& crs,
+    typename CrsType::size_type nrows,
+    Functor const& f) {
+  Kokkos::Experimental::CountAndFill<CrsType, Functor>(crs, nrows, f);
+}
+
 }} // namespace Kokkos::Experimental
 
 #endif /* #define KOKKOS_CRS_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp
index 250ef6630ad6ec294782121e2eab866e89b434d9..7137eaae4b7d8336e269b0f946c10a7cf4e62515 100644
--- a/lib/kokkos/core/src/Kokkos_Macros.hpp
+++ b/lib/kokkos/core/src/Kokkos_Macros.hpp
@@ -96,6 +96,14 @@
 
 //----------------------------------------------------------------------------
 
+#if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \
+    defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_QTHREADS) || \
+    defined(KOKKOS_ENABLE_ROCM) || defined(KOKKOS_ENABLE_OPENMPTARGET)
+  #define KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND
+#endif
+
+#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
+
 #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
   // Compiling with a CUDA compiler.
   //
@@ -133,6 +141,9 @@
 
     #if ( CUDA_VERSION < 8000 ) && defined( __NVCC__ )
       #define KOKKOS_LAMBDA [=]__device__
+      #if defined( KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND )
+        #undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
+      #endif
     #else
       #define KOKKOS_LAMBDA [=]__host__ __device__
 
@@ -141,16 +152,13 @@
       #endif
     #endif
 
-    #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
-  #endif
-#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
-
-#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
-   // Cuda version 8.0 still needs the functor wrapper
-   #if /* ( CUDA_VERSION < 8000 ) && */  defined( __NVCC__ )
+    #if defined( __NVCC__ )
       #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
-   #endif
-#endif
+    #endif
+  #else // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
+    #undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
+  #endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
+#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
 
 //----------------------------------------------------------------------------
 // Language info: C++, CUDA, OPENMP
@@ -161,8 +169,20 @@
   #define KOKKOS_FORCEINLINE_FUNCTION  __device__  __host__  __forceinline__
   #define KOKKOS_INLINE_FUNCTION       __device__  __host__  inline
   #define KOKKOS_FUNCTION              __device__  __host__
+  #ifdef KOKKOS_COMPILER_CLANG
+  #define KOKKOS_FUNCTION_DEFAULTED KOKKOS_FUNCTION
+  #endif
 #endif // #if defined( __CUDA_ARCH__ )
 
+#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ )
+
+  #define KOKKOS_FORCEINLINE_FUNCTION  __attribute__((amp,cpu)) inline
+  #define KOKKOS_INLINE_FUNCTION       __attribute__((amp,cpu)) inline
+  #define KOKKOS_FUNCTION              __attribute__((amp,cpu))
+  #define KOKKOS_LAMBDA                [=] __attribute__((amp,cpu))
+  #define KOKKOS_FUNCTION_DEFAULTED    KOKKOS_FUNCTION
+#endif
+
 #if defined( _OPENMP )
   //  Compiling with OpenMP.
   //  The value of _OPENMP is an integer value YYYYMM
@@ -179,15 +199,6 @@
   // Host code is compiled again with another compiler.
   // Device code is compile to 'ptx'.
   #define KOKKOS_COMPILER_NVCC __NVCC__
-#else
-  #if !defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
-    #if !defined( KOKKOS_ENABLE_CUDA ) // Compiling with clang for Cuda does not work with LAMBDAs either
-      // CUDA (including version 6.5) does not support giving lambdas as
-      // arguments to global functions. Thus its not currently possible
-      // to dispatch lambdas from the host.
-      #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
-    #endif
-  #endif
 #endif // #if defined( __NVCC__ )
 
 #if !defined( KOKKOS_LAMBDA )
@@ -321,6 +332,10 @@
   //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
   //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
   //#define KOKKOS_ENABLE_PRAGMA_SIMD 1
+
+  #if ! defined( KOKKOS_ENABLE_ASM )
+    #define KOKKOS_ENABLE_ASM 1
+  #endif
 #endif
 
 //----------------------------------------------------------------------------
@@ -397,6 +412,10 @@
   #define KOKKOS_FUNCTION /**/
 #endif
 
+#if !defined( KOKKOS_FUNCTION_DEFAULTED )
+  #define KOKKOS_FUNCTION_DEFAULTED /**/
+#endif
+
 //----------------------------------------------------------------------------
 // Define empty macro for restrict if necessary:
 
@@ -424,6 +443,7 @@
 // There is zero or one default execution space specified.
 
 #if 1 < ( ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
+          ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM ) ? 1 : 0 ) + \
           ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) ? 1 : 0 ) + \
           ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
           ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
@@ -435,6 +455,7 @@
 // If default is not specified then chose from enabled execution spaces.
 // Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL
 #if   defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
+#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM )
 #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET )
 #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
 #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
@@ -442,6 +463,8 @@
 #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
 #elif defined( KOKKOS_ENABLE_CUDA )
   #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
+#elif defined( KOKKOS_ENABLE_ROCM )
+  #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM
 #elif defined( KOKKOS_ENABLE_OPENMPTARGET )
   #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET
 #elif defined( KOKKOS_ENABLE_OPENMP )
@@ -459,6 +482,8 @@
 
 #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
   #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
+#elif   defined( __HCC__ ) && defined( __HCC_ACCELERATOR__ ) && defined( KOKKOS_ENABLE_ROCM )
+  #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU
 #else
   #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
 #endif
diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp
index 1da936067d59e269ce43fb6a9e65cfe16220cc2e..4ba5812f9e93e19060a8fa8cb20e94b1fb6864f7 100644
--- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp
+++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp
@@ -233,12 +233,24 @@ public:
 
   //--------------------------------------------------------------------------
 
-  MemoryPool() = default ;
   MemoryPool( MemoryPool && ) = default ;
   MemoryPool( const MemoryPool & ) = default ;
   MemoryPool & operator = ( MemoryPool && ) = default ;
   MemoryPool & operator = ( const MemoryPool & ) = default ;
 
+  MemoryPool()
+    : m_tracker()
+    , m_sb_state_array(0)
+    , m_sb_state_size(0)
+    , m_sb_size_lg2(0)
+    , m_max_block_size_lg2(0)
+    , m_min_block_size_lg2(0)
+    , m_sb_count(0)
+    , m_hint_offset(0)
+    , m_data_offset(0)
+    , m_unused_padding(0)
+    {}
+
   /**\brief  Allocate a memory pool from 'memspace'.
    *
    *  The memory pool will have at least 'min_total_alloc_size' bytes
diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
index 9df6d4ba097d62191214e27578c656bf21ea70a5..c392fc5b9ab76f19769b5d68b8b65cbd8340db54 100644
--- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
+++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
@@ -1016,7 +1016,7 @@ parallel_reduce( std::string const  & arg_label
 
   //------------------------------
 
-  #if (KOKKOS_ENABLE_PROFILING)
+  #if defined(KOKKOS_ENABLE_PROFILING)
   uint64_t kpID = 0;
   if(Kokkos::Profiling::profileLibraryLoaded()) {
     Kokkos::Profiling::beginParallelReduce(arg_label, 0, &kpID);
@@ -1042,7 +1042,7 @@ parallel_reduce( std::string const  & arg_label
 
   //------------------------------
 
-  #if (KOKKOS_ENABLE_PROFILING)
+  #if defined(KOKKOS_ENABLE_PROFILING)
   if(Kokkos::Profiling::profileLibraryLoaded()) {
     Kokkos::Profiling::endParallelReduce(kpID);
   }
diff --git a/lib/kokkos/core/src/Kokkos_ROCm.hpp b/lib/kokkos/core/src/Kokkos_ROCm.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b13b0b01dea588e3ddf2fd57a7be5b24005d4498
--- /dev/null
+++ b/lib/kokkos/core/src/Kokkos_ROCm.hpp
@@ -0,0 +1,220 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_ROCM_HPP
+#define KOKKOS_ROCM_HPP
+
+#include <Kokkos_Core_fwd.hpp>
+
+#if defined( KOKKOS_ENABLE_ROCM )
+#include <ROCm/hc_math_std.hpp>
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#include <cstddef>
+#include <iosfwd>
+#include <Kokkos_HostSpace.hpp>
+#include <Kokkos_ROCmSpace.hpp>
+#include <ROCm/Kokkos_ROCm_Exec.hpp>
+#include <Kokkos_ScratchSpace.hpp>
+#include <Kokkos_Parallel.hpp>
+#include <Kokkos_Layout.hpp>
+#include <impl/Kokkos_Tags.hpp>
+
+/*--------------------------------------------------------------------------*/
+
+#include <hc.hpp>
+#include <hc_am.hpp>
+#include <amp_math.h>
+
+#if defined( __HCC_ACCELERATOR__ )
+
+using namespace ::Concurrency::precise_math ;
+
+#endif
+
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Impl {
+class ROCmExec ;
+} // namespace Impl
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Experimental {
+/// \class ROCm
+/// \brief Kokkos device for multicore processors in the host memory space.
+class ROCm {
+public:
+  //------------------------------------
+  //! \name Type declarations that all Kokkos devices must provide.
+  //@{
+
+  //! Tag this class as a kokkos execution space
+  typedef ROCm                  execution_space ;
+  typedef ROCmSpace             memory_space ;
+  typedef Kokkos::Device<execution_space,memory_space> device_type;
+
+  typedef LayoutLeft            array_layout ;
+  typedef HostSpace::size_type  size_type ;
+
+  typedef ScratchMemorySpace< ROCm > scratch_memory_space ;
+
+  ~ROCm() {}
+  ROCm();
+//  explicit ROCm( const int instance_id );
+
+  ROCm( ROCm && ) = default ;
+  ROCm( const ROCm & ) = default ;
+  ROCm & operator = ( ROCm && ) = default ;
+  ROCm & operator = ( const ROCm & ) = default ;
+
+
+  //@}
+  //------------------------------------
+  //! \name Functions that all Kokkos devices must implement.
+  //@{
+
+  KOKKOS_INLINE_FUNCTION static int in_parallel() {
+#if defined( __HCC_ACCELERATOR__ )
+    return true;
+#else
+    return false;
+#endif
+  }
+
+  /** \brief  Set the device in a "sleep" state. */
+  static bool sleep() ;
+
+  /** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */
+  static bool wake() ;
+
+  /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */
+  static void fence() ;
+
+  /// \brief Print configuration information to the given output stream.
+  static void print_configuration( std::ostream & , const bool detail = false );
+
+  /// \brief Free any resources being consumed by the device.
+  static void finalize() ;
+
+  /** \brief  Initialize the device.
+   *
+   */
+  struct SelectDevice {
+    int rocm_device_id ;
+    SelectDevice() : rocm_device_id(1) {}
+    explicit SelectDevice( int id ) : rocm_device_id( id+1 ) {}
+  };
+
+  int          rocm_device() const { return m_device ; }
+  bool         isAPU();
+  bool         isAPU(int device);
+
+  static void initialize( const SelectDevice = SelectDevice());
+
+  static int is_initialized();
+
+//  static size_type device_arch();
+
+//  static size_type detect_device_count();
+
+
+  static int concurrency() ;
+  static const char* name();
+private:
+  int          m_device ;
+
+};
+}
+} // namespace Kokkos
+
+namespace Kokkos {
+namespace Impl {
+
+template<>
+struct MemorySpaceAccess
+  < Kokkos::Experimental::ROCmSpace
+  , Kokkos::Experimental::ROCm::scratch_memory_space
+  >
+{
+  enum { assignable = false };
+  enum { accessible = true };
+  enum { deepcopy   = false };
+};
+
+template<>
+struct VerifyExecutionCanAccessMemorySpace
+  < Kokkos::Experimental::ROCm::memory_space
+  , Kokkos::Experimental::ROCm::scratch_memory_space
+  >
+{
+  enum { value = true };
+  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
+  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
+};
+
+template<>
+struct VerifyExecutionCanAccessMemorySpace
+  < Kokkos::HostSpace
+  , Kokkos::Experimental::ROCm::scratch_memory_space
+  >
+{
+  enum { value = false };
+  inline static void verify( void ) { Experimental::ROCmSpace::access_error(); }
+  inline static void verify( const void * p ) { Experimental::ROCmSpace::access_error(p); }
+};
+} // namespace Experimental
+} // namespace Kokkos
+
+
+#include <ROCm/Kokkos_ROCm_Parallel.hpp>
+#include <ROCm/Kokkos_ROCm_Task.hpp>
+
+#endif
+#endif
+
+
diff --git a/lib/kokkos/core/src/Kokkos_ROCmSpace.hpp b/lib/kokkos/core/src/Kokkos_ROCmSpace.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dce6a3cb3f838c56aba2de6f73a775b0586ea353
--- /dev/null
+++ b/lib/kokkos/core/src/Kokkos_ROCmSpace.hpp
@@ -0,0 +1,622 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_ROCMSPACE_HPP
+#define KOKKOS_ROCMSPACE_HPP
+
+#include <Kokkos_Core_fwd.hpp>
+
+#if defined( KOKKOS_ENABLE_ROCM )
+
+#include <iosfwd>
+#include <typeinfo>
+#include <string>
+
+#include <Kokkos_HostSpace.hpp>
+
+
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Experimental {
+/** \brief  ROCm on-device memory management */
+
+class ROCmSpace {
+public:
+
+  //! Tag this class as a kokkos memory space
+  typedef ROCmSpace             memory_space ;
+  typedef Kokkos::Experimental::ROCm          execution_space ;
+  typedef Kokkos::Device<execution_space,memory_space> device_type;
+
+  typedef unsigned int          size_type ;
+
+  /*--------------------------------*/
+
+  ROCmSpace();
+  ROCmSpace( ROCmSpace && rhs ) = default ;
+  ROCmSpace( const ROCmSpace & rhs ) = default ;
+  ROCmSpace & operator = ( ROCmSpace && rhs ) = default ;
+  ROCmSpace & operator = ( const ROCmSpace & rhs ) = default ;
+  ~ROCmSpace() = default ;
+
+  /**\brief  Allocate untracked memory in the rocm space */
+  void * allocate( const size_t arg_alloc_size ) const ;
+
+  /**\brief  Deallocate untracked memory in the rocm space */
+  void deallocate( void * const arg_alloc_ptr
+                 , const size_t arg_alloc_size ) const ;
+
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name() { return m_name; };
+
+  /*--------------------------------*/
+  /** \brief  Error reporting for HostSpace attempt to access ROCmSpace */
+  static void access_error();
+  static void access_error( const void * const );
+
+private:
+
+  int  m_device ; ///< Which ROCm device
+
+  static constexpr const char* m_name = "ROCm";
+  friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > ;
+};
+
+} // namespace Experimental
+
+namespace Impl {
+
+void * rocm_device_allocate(int);
+void * rocm_hostpinned_allocate(int);
+void rocm_device_free(void * );
+
+/// \brief Initialize lock array for arbitrary size atomics.
+///
+/// Arbitrary atomics are implemented using a hash table of locks
+/// where the hash value is derived from the address of the
+/// object for which an atomic operation is performed.
+/// This function initializes the locks to zero (unset).
+void init_lock_arrays_rocm_space();
+
+/// \brief Retrieve the pointer to the lock array for arbitrary size atomics.
+///
+/// Arbitrary atomics are implemented using a hash table of locks
+/// where the hash value is derived from the address of the
+/// object for which an atomic operation is performed.
+/// This function retrieves the lock array pointer.
+/// If the array is not yet allocated it will do so.
+int* atomic_lock_array_rocm_space_ptr(bool deallocate = false);
+
+/// \brief Retrieve the pointer to the scratch array for team and thread private global memory.
+///
+/// Team and Thread private scratch allocations in
+/// global memory are aquired via locks.
+/// This function retrieves the lock array pointer.
+/// If the array is not yet allocated it will do so.
+int* scratch_lock_array_rocm_space_ptr(bool deallocate = false);
+
+/// \brief Retrieve the pointer to the scratch array for unique identifiers.
+///
+/// Unique identifiers in the range 0-ROCm::concurrency
+/// are provided via locks.
+/// This function retrieves the lock array pointer.
+/// If the array is not yet allocated it will do so.
+int* threadid_lock_array_rocm_space_ptr(bool deallocate = false);
+}
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
+
+namespace Kokkos {
+namespace Experimental {
+/** \brief  Host memory that is accessible to ROCm execution space
+ *          through ROCm's host-pinned memory allocation.
+ */
+class ROCmHostPinnedSpace {
+public:
+
+  //! Tag this class as a kokkos memory space
+  /** \brief  Memory is in HostSpace so use the HostSpace::execution_space */
+  typedef HostSpace::execution_space  execution_space ;
+  typedef ROCmHostPinnedSpace         memory_space ;
+  typedef Kokkos::Device<execution_space,memory_space> device_type;
+  typedef unsigned int                size_type ;
+
+  /*--------------------------------*/
+
+  ROCmHostPinnedSpace();
+  ROCmHostPinnedSpace( ROCmHostPinnedSpace && rhs ) = default ;
+  ROCmHostPinnedSpace( const ROCmHostPinnedSpace & rhs ) = default ;
+  ROCmHostPinnedSpace & operator = ( ROCmHostPinnedSpace && rhs ) = default ;
+  ROCmHostPinnedSpace & operator = ( const ROCmHostPinnedSpace & rhs ) = default ;
+  ~ROCmHostPinnedSpace() = default ;
+
+  /**\brief  Allocate untracked memory in the space */
+  void * allocate( const size_t arg_alloc_size ) const ;
+
+  /**\brief  Deallocate untracked memory in the space */
+  void deallocate( void * const arg_alloc_ptr
+                 , const size_t arg_alloc_size ) const ;
+
+  /**\brief Return Name of the MemorySpace */
+  static constexpr const char* name() { return m_name; };
+
+private:
+
+  static constexpr const char* m_name = "ROCmHostPinned";
+
+  /*--------------------------------*/
+};
+} // namespace Experimental
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Impl {
+
+static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace >::assignable , "" );
+
+//----------------------------------------
+
+template<>
+struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace > {
+  enum { assignable = false };
+  enum { accessible = false };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace > {
+  // HostSpace::execution_space == ROCmHostPinnedSpace::execution_space
+  enum { assignable = true };
+  enum { accessible = true };
+  enum { deepcopy   = true };
+};
+
+//----------------------------------------
+
+template<>
+struct MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::HostSpace > {
+  enum { assignable = false };
+  enum { accessible = false };
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace > {
+  // ROCmSpace::execution_space != ROCmHostPinnedSpace::execution_space
+  enum { assignable = false };
+  enum { accessible = true }; // ROCmSpace::execution_space
+  enum { deepcopy   = true };
+};
+
+
+//----------------------------------------
+// ROCmHostPinnedSpace::execution_space == HostSpace::execution_space
+// ROCmHostPinnedSpace accessible to both ROCm and Host
+
+template<>
+struct MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::HostSpace > {
+  enum { assignable = false }; // Cannot access from ROCm
+  enum { accessible = true };  // ROCmHostPinnedSpace::execution_space
+  enum { deepcopy   = true };
+};
+
+template<>
+struct MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmSpace > {
+  enum { assignable = false }; // Cannot access from Host
+  enum { accessible = false };
+  enum { deepcopy   = true };
+};
+
+};
+//----------------------------------------
+
+} // namespace Kokkos::Impl
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Impl {
+
+hc::completion_future DeepCopyAsyncROCm( void * dst , const void * src , size_t n);
+
+template<> struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm>
+{
+  DeepCopy( void * dst , const void * src , size_t );
+  DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
+};
+
+template<> struct DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm >
+{
+  DeepCopy( void * dst , const void * src , size_t );
+  DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
+};
+
+template<> struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >
+{
+  DeepCopy( void * dst , const void * src , size_t );
+  DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
+};
+
+template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
+    fut.wait();
+//    DeepCopy (dst,src,n);
+  }
+};
+
+template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm>( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    DeepCopy (dst,src,n);
+  }
+};
+
+template<class ExecutionSpace>
+struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    DeepCopy (dst,src,n);
+  }
+};
+
+template<> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm>
+{
+  DeepCopy( void * dst , const void * src , size_t );
+  DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
+};
+
+template<> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , Kokkos::Experimental::ROCm >
+{
+  DeepCopy( void * dst , const void * src , size_t );
+  DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
+};
+
+template<> struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >
+{
+  DeepCopy( void * dst , const void * src , size_t );
+  DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
+};
+
+template<class ExecutionSpace>
+struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace>
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
+    fut.wait();
+//    DeepCopyROCm (dst,src,n);
+  }
+};
+
+template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
+    fut.wait();
+//    DeepCopyROCm (dst,src,n);
+  }
+};
+
+
+
+template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+//    hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
+//    fut.wait();
+//    DeepCopyAsyncROCm (dst,src,n);
+    DeepCopy (dst,src,n);
+  }
+};
+
+template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , Kokkos::Experimental::ROCm>( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    DeepCopy (dst,src,n);
+  }
+};
+
+template<class ExecutionSpace>
+struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace >
+{
+  inline
+  DeepCopy( void * dst , const void * src , size_t n )
+  { (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
+
+  inline
+  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
+  {
+    exec.fence();
+    DeepCopy (dst,src,n);
+  }
+};
+} // namespace Impl
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+/** Running in ROCmSpace attempting to access HostSpace: error */
+template<>
+struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::ROCmSpace , Kokkos::HostSpace >
+{
+  enum { value = false };
+  KOKKOS_INLINE_FUNCTION static void verify( void )
+    { Kokkos::abort("ROCm code attempted to access HostSpace memory"); }
+
+  KOKKOS_INLINE_FUNCTION static void verify( const void * )
+    { Kokkos::abort("ROCm code attempted to access HostSpace memory"); }
+};
+
+/** Running in ROCmSpace accessing ROCmHostPinnedSpace: ok */
+template<>
+struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace >
+{
+  enum { value = true };
+  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
+  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
+};
+
+/** Running in ROCmSpace attempting to access an unknown space: error */
+template< class OtherSpace >
+struct VerifyExecutionCanAccessMemorySpace<
+  typename enable_if< ! is_same<Kokkos::Experimental::ROCmSpace,OtherSpace>::value , Kokkos::Experimental::ROCmSpace >::type ,
+  OtherSpace >
+{
+  enum { value = false };
+  KOKKOS_INLINE_FUNCTION static void verify( void )
+    { Kokkos::abort("ROCm code attempted to access unknown Space memory"); }
+
+  KOKKOS_INLINE_FUNCTION static void verify( const void * )
+    { Kokkos::abort("ROCm code attempted to access unknown Space memory"); }
+};
+
+//----------------------------------------------------------------------------
+/** Running in HostSpace attempting to access ROCmSpace */
+template<>
+struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace >
+{
+  enum { value = false };
+  inline static void verify( void ) { Kokkos::Experimental::ROCmSpace::access_error(); }
+  inline static void verify( const void * p ) { Kokkos::Experimental::ROCmSpace::access_error(p); }
+};
+
+/** Running in HostSpace accessing ROCmHostPinnedSpace is OK */
+template<>
+struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace >
+{
+  enum { value = true };
+  KOKKOS_INLINE_FUNCTION static void verify( void ) {}
+  KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
+};
+} // namespace Impl
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+template<>
+class SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >
+  : public SharedAllocationRecord< void , void >
+{
+private:
+
+
+  typedef SharedAllocationRecord< void , void >  RecordBase ;
+
+  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
+  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
+
+  static void deallocate( RecordBase * );
+
+  static RecordBase s_root_record ;
+
+  const Kokkos::Experimental::ROCmSpace m_space ;
+
+protected:
+
+  ~SharedAllocationRecord();
+
+  SharedAllocationRecord( const Kokkos::Experimental::ROCmSpace        & arg_space
+                        , const std::string              & arg_label
+                        , const size_t                     arg_alloc_size
+                        , const RecordBase::function_type  arg_dealloc = & deallocate
+                        );
+
+public:
+
+  std::string get_label() const ;
+
+  static SharedAllocationRecord * allocate( const Kokkos::Experimental::ROCmSpace &  arg_space
+                                          , const std::string       &  arg_label
+                                          , const size_t               arg_alloc_size );
+
+  /**\brief  Allocate tracked memory in the space */
+  static
+  void * allocate_tracked( const Kokkos::Experimental::ROCmSpace & arg_space
+                         , const std::string & arg_label
+                         , const size_t arg_alloc_size );
+
+  /**\brief  Reallocate tracked memory in the space */
+  static
+  void * reallocate_tracked( void * const arg_alloc_ptr
+                           , const size_t arg_alloc_size );
+
+  /**\brief  Deallocate tracked memory in the space */
+  static
+  void deallocate_tracked( void * const arg_alloc_ptr );
+
+  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
+
+  static void print_records( std::ostream & , const Kokkos::Experimental::ROCmSpace & , bool detail = false );
+};
+
+template<>
+class SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >
+  : public SharedAllocationRecord< void , void >
+{
+private:
+
+  typedef SharedAllocationRecord< void , void >  RecordBase ;
+
+  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
+  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
+
+  static void deallocate( RecordBase * );
+
+  static RecordBase s_root_record ;
+
+  const Kokkos::Experimental::ROCmHostPinnedSpace m_space ;
+
+protected:
+
+  ~SharedAllocationRecord();
+  SharedAllocationRecord() : RecordBase(), m_space() {}
+
+  SharedAllocationRecord( const Kokkos::Experimental::ROCmHostPinnedSpace     & arg_space
+                        , const std::string              & arg_label
+                        , const size_t                     arg_alloc_size
+                        , const RecordBase::function_type  arg_dealloc = & deallocate
+                        );
+
+public:
+
+  std::string get_label() const ;
+
+  static SharedAllocationRecord * allocate( const Kokkos::Experimental::ROCmHostPinnedSpace &  arg_space
+                                          , const std::string          &  arg_label
+                                          , const size_t                  arg_alloc_size
+                                          );
+  /**\brief  Allocate tracked memory in the space */
+  static
+  void * allocate_tracked( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
+                         , const std::string & arg_label
+                         , const size_t arg_alloc_size );
+
+  /**\brief  Reallocate tracked memory in the space */
+  static
+  void * reallocate_tracked( void * const arg_alloc_ptr
+                           , const size_t arg_alloc_size );
+
+  /**\brief  Deallocate tracked memory in the space */
+  static
+  void deallocate_tracked( void * const arg_alloc_ptr );
+
+
+  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
+
+  static void print_records( std::ostream & , const Kokkos::Experimental::ROCmHostPinnedSpace & , bool detail = false );
+};
+} // namespace Impl
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #if defined( KOKKOS_ENABLE_ROCM ) */
+#endif /* #define KOKKOS_ROCMSPACE_HPP */
+
diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp
index fcfc91a4eeb8bd0e92506a85ec2989d49f81ef54..079f80f5561f7efa5a5d1adf2f1fe6914acb82ff 100644
--- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp
+++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp
@@ -681,6 +681,67 @@ public:
       return f ;
     }
 
+  template < class F >
+  KOKKOS_FUNCTION
+  Future< execution_space >
+  when_all( int narg , F const func )
+    {
+      using input_type  = decltype( func(0) );
+      using future_type = Future< execution_space > ;
+      using task_base   = Kokkos::Impl::TaskBase< void , void , void > ;
+
+      static_assert( is_future< input_type >::value
+                   , "Functor must return a Kokkos::Future" );
+
+      future_type f ;
+
+      if ( 0 == narg ) return f ;
+
+      size_t const alloc_size = m_queue->when_all_allocation_size( narg );
+
+      f.m_task =
+        reinterpret_cast< task_base * >( m_queue->allocate( alloc_size ) );
+
+      if ( f.m_task ) {
+
+        // Reference count starts at two:
+        // +1 to match decrement when task completes
+        // +1 for the future
+
+        new( f.m_task ) task_base();
+
+        f.m_task->m_queue      = m_queue ;
+        f.m_task->m_ref_count  = 2 ;
+        f.m_task->m_alloc_size = alloc_size ;
+        f.m_task->m_dep_count  = narg ;
+        f.m_task->m_task_type  = task_base::Aggregate ;
+
+        // Assign dependences, reference counts were already incremented
+
+        task_base * volatile * const dep =
+          f.m_task->aggregate_dependences();
+
+        for ( int i = 0 ; i < narg ; ++i ) {
+          const input_type arg_f = func(i);
+          if ( 0 != arg_f.m_task ) {
+
+            if ( m_queue != static_cast< queue_type * >( arg_f.m_task->m_queue ) ) {
+              Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" );
+            }
+            // Increment reference count to track subsequent assignment.
+            Kokkos::atomic_increment( &(arg_f.m_task->m_ref_count) );
+            dep[i] = arg_f.m_task ;
+          }
+        }
+
+        Kokkos::memory_fence();
+
+        m_queue->schedule_aggregate( f.m_task );
+        // this when_all may be processed at any moment
+      }
+      return f ;
+    }
+
   //----------------------------------------
 
   KOKKOS_INLINE_FUNCTION
diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp
index 1754e4a8fb5999a6baf1ddb8eec22810f4ec5238..47b105cfdccc9bb83cc961f50bd082f2e0ccdf9f 100644
--- a/lib/kokkos/core/src/Kokkos_View.hpp
+++ b/lib/kokkos/core/src/Kokkos_View.hpp
@@ -2429,6 +2429,7 @@ template < class ValueType >
 struct CommonViewAllocProp< void, ValueType >
 {
   using value_type = ValueType;
+  using scalar_array_type = ValueType;
 
   template < class ... Views >
   CommonViewAllocProp( const Views & ... ) {}
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Atomic.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Atomic.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a93f488203d7fac66daf79395ce9a2c3ab9aa01a
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Atomic.hpp
@@ -0,0 +1,439 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <hc.hpp>
+//#include <hsa_atomic.h>
+
+#ifdef KOKKOS_ENABLE_ROCM_ATOMICS
+namespace Kokkos {
+  //ROCm can do:
+  //Types int/unsigned int
+  //variants: atomic_exchange/compare_exchange/fetch_add/fetch_sub/fetch_max/fetch_min/fetch_and/fetch_or/fetch_xor/fetch_inc/fetch_dec 
+
+
+  KOKKOS_INLINE_FUNCTION
+  int atomic_exchange(int* dest, const int& val) {
+    return hc::atomic_exchange_int(dest, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  unsigned int atomic_exchange(unsigned int* dest, const unsigned int& val) {
+    return hc::atomic_exchange_unsigned(dest, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int64_t atomic_exchange(int64_t* dest, const int64_t& val) {
+    return (int64_t)hc::atomic_exchange_uint64((uint64_t*)dest, (const uint64_t&)val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  uint64_t atomic_exchange(uint64_t* dest, const uint64_t& val) {
+    return hc::atomic_exchange_uint64(dest, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  long long atomic_exchange(long long* dest, const long long& val) {
+    return (long long)hc::atomic_exchange_uint64((uint64_t*)dest, (const uint64_t&)val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  unsigned long long atomic_exchange(unsigned long long* dest, const unsigned long long& val) {
+    return (unsigned long long)hc::atomic_exchange_uint64((uint64_t*)dest, (const uint64_t&)val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  float atomic_exchange(float* dest, const float& val) {
+    union U {
+      int i ;
+      float f ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } idest,ival;
+    idest.f = *dest;
+    ival.f = val;
+    idest.i = hc::atomic_exchange_int((int*)dest, ival.i);
+    return idest.f;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  double atomic_exchange(double* dest, const double& val) {
+    union U {
+      uint64_t i ;
+      double d ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } idest,ival;
+    idest.d = *dest;
+    ival.d = val;
+    idest.i = hc::atomic_exchange_uint64((uint64_t*)dest, ival.i);
+    return idest.d;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int atomic_compare_exchange(int* dest, int compare, const int& val);
+
+  KOKKOS_INLINE_FUNCTION
+  int64_t atomic_compare_exchange(int64_t* dest, int64_t compare, const int64_t& val);
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_exchange(T* dest, typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) {
+    union U {
+      int i ;
+      T t ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } assume , oldval , newval ;
+
+    oldval.t = *dest ;
+    assume.i = oldval.i ;
+    newval.t = val ;
+    atomic_compare_exchange( reinterpret_cast<int*>(dest) , assume.i, newval.i );
+
+    return oldval.t ;    
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_exchange(T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(int64_t), const T&>::type val) {
+    union U {
+      uint64_t i ;
+      T t ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } assume , oldval , newval ;
+
+    oldval.t = *dest ;
+
+    assume.i = oldval.i ;
+    newval.t = val ;
+    atomic_compare_exchange( (int64_t*)(dest) , assume.i, newval.i );
+
+    return oldval.t ;    
+  }
+ 
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_exchange(T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(int64_t), const T&>::type val) {
+    return val;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int atomic_compare_exchange(int* dest, int compare, const int& val) {
+    return hc::atomic_compare_exchange_int(dest, compare, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  unsigned int atomic_compare_exchange(unsigned int* dest, unsigned int compare, const unsigned int& val) {
+    return hc::atomic_compare_exchange_unsigned(dest, compare, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int64_t atomic_compare_exchange(int64_t* dest, int64_t compare, const int64_t& val) {
+    return (int64_t) hc::atomic_compare_exchange_uint64((uint64_t*)dest, (uint64_t)compare, (const uint64_t&)val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  uint64_t atomic_compare_exchange(uint64_t* dest, uint64_t compare, const uint64_t& val) {
+    return hc::atomic_compare_exchange_uint64(dest, compare, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  long long atomic_compare_exchange(long long* dest, long long compare, const long long& val) {
+    return (long long)hc::atomic_compare_exchange_uint64((uint64_t*)(dest), (uint64_t)(compare), (const uint64_t&)(val));
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  float atomic_compare_exchange(float* dest, float compare, const float& val) {
+    union U {
+      int i ;
+      float f ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } idest,icompare,ival;
+    idest.f = *dest;
+    icompare.f = compare;
+    ival.f = val;
+    idest.i = hc::atomic_compare_exchange_int(reinterpret_cast<int*>(dest), icompare.i, ival.i);
+    return idest.f;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  double atomic_compare_exchange(double* dest, double compare, const double& val) {
+    union U {
+      uint64_t i ;
+      double d ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } idest,icompare,ival;
+    idest.d = *dest;
+    icompare.d = compare;
+    ival.d = val;
+    idest.i = hc::atomic_compare_exchange_uint64(reinterpret_cast<uint64_t*>(dest), icompare.i, ival.i);
+    return idest.d;
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_compare_exchange(volatile T* dest, T compare, typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) {
+    union U {
+      int i ;
+      T f ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } idest,icompare,ival;
+    idest.f = *dest;
+    icompare.f = compare;
+    ival.f = val;
+    idest.i = hc::atomic_compare_exchange_int((int*)(dest), icompare.i, ival.i);
+    return idest.f;
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_compare_exchange(volatile T* dest, T compare, typename std::enable_if<sizeof(T) == sizeof(int64_t), const T&>::type val) {
+    union U {
+      uint64_t i ;
+      T f ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } idest,icompare,ival;
+    idest.f = *dest;
+    icompare.f = compare;
+    ival.f = val;
+    idest.i = hc::atomic_compare_exchange_uint64((uint64_t*)(dest), icompare.i, ival.i);
+    return idest.f;
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_compare_exchange(volatile T* dest, T compare, typename std::enable_if<(sizeof(T) != sizeof(int32_t)) && (sizeof(T) != sizeof(int64_t)), const T&>::type val) {
+    return val;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int atomic_fetch_add (volatile int * dest, const int& val) {
+    return hc::atomic_fetch_add((int *)dest, val);
+  }
+  
+  KOKKOS_INLINE_FUNCTION
+  unsigned int atomic_fetch_add(unsigned int* dest, const unsigned int& val) {
+    return hc::atomic_fetch_add(dest, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  unsigned long atomic_fetch_add(volatile unsigned long* dest, const unsigned long& val) {
+    return (unsigned long)hc::atomic_fetch_add((uint64_t *)dest, (const uint64_t)val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int64_t atomic_fetch_add(volatile int64_t* dest, const int64_t& val) {
+    return (int64_t)hc::atomic_fetch_add((uint64_t *)dest, (const uint64_t&)val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  char atomic_fetch_add(volatile char * dest, const char& val) {
+    unsigned int oldval,newval,assume;
+    oldval = *(int *)dest ;
+
+    do {
+      assume = oldval ;
+      newval = assume&0x7fffff00 + ((assume&0xff)+val)&0xff ;
+      oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
+    } while ( assume != oldval );
+
+    return oldval ;    
+  }
+
+
+  KOKKOS_INLINE_FUNCTION
+  short atomic_fetch_add(volatile short * dest, const short& val) {
+    unsigned int oldval,newval,assume;
+    oldval = *(int *)dest ;
+
+    do {
+      assume = oldval ;
+      newval = assume&0x7fff0000 + ((assume&0xffff)+val)&0xffff ;
+      oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
+    } while ( assume != oldval );
+
+    return oldval ;    
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  long long atomic_fetch_add(volatile long long * dest, const long long& val) {
+    return (long long)hc::atomic_fetch_add((uint64_t*)dest, (const uint64_t&)val);
+  }
+
+
+
+  KOKKOS_INLINE_FUNCTION
+  int atomic_fetch_sub (volatile int * dest, const int& val) {
+    return hc::atomic_fetch_sub((int *)dest, val);
+  }
+  
+  KOKKOS_INLINE_FUNCTION
+  unsigned int atomic_fetch_sub(volatile unsigned int* dest, const unsigned int& val) {
+    return hc::atomic_fetch_sub((unsigned int *)dest, val);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int64_t atomic_fetch_sub(int64_t* dest, const int64_t& val) {
+    return (int64_t)hc::atomic_fetch_add((uint64_t *)dest, -(const uint64_t&)val);
+//    return (int64_t)hc::atomic_fetch_sub_uint64((uint64_t*)dest, (const uint64_t&)val);
+  }
+  
+  KOKKOS_INLINE_FUNCTION
+  char atomic_fetch_sub(volatile char * dest, const char& val) {
+    unsigned int oldval,newval,assume;
+    oldval = *(int *)dest ;
+
+    do {
+      assume = oldval ;
+      newval = assume&0x7fffff00 + ((assume&0xff)-val)&0xff ;
+      oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
+    } while ( assume != oldval );
+
+    return oldval ;    
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  short atomic_fetch_sub(volatile short * dest, const short& val) {
+    unsigned int oldval,newval,assume;
+    oldval = *(int *)dest ;
+
+    do {
+      assume = oldval ;
+      newval = assume&0x7fff0000 + ((assume&0xffff)-val)&0xffff;
+      oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
+    } while ( assume != oldval );
+
+    return oldval ;    
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  long long atomic_fetch_sub(volatile long long * dest, const long long& val) {
+    return (long long)hc::atomic_fetch_add((uint64_t*)dest, -(const uint64_t&)val);
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_fetch_add(volatile T* dest, typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) {
+    union U {
+      unsigned int i ;
+      T t ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } assume , oldval , newval ;
+
+    oldval.t = *dest ;
+
+    do {
+      assume.i = oldval.i ;
+      newval.t = assume.t + val ;
+      oldval.i = atomic_compare_exchange( (unsigned int*)(dest) , assume.i , newval.i );
+    } while ( assume.i != oldval.i );
+
+    return oldval.t ;    
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_fetch_add(volatile T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(int64_t), const T&>::type val) {
+    union U {
+      uint64_t i ;
+      T t ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } assume , oldval , newval ;
+
+    oldval.t = *dest ;
+
+    do {
+      assume.i = oldval.i ;
+      newval.t = assume.t + val ;
+      oldval.i = atomic_compare_exchange( (uint64_t*)dest , assume.i , newval.i );
+    } while ( assume.i != oldval.i );
+
+    return oldval.t ;    
+  }
+
+
+  //WORKAROUND
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_fetch_add(volatile T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(int64_t), const T&>::type val) {
+    return val ;
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_fetch_sub(volatile T* dest, typename std::enable_if<sizeof(T) == sizeof(int),T>::type & val) {
+    union U {
+      int i ;
+      T t ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } assume , oldval , newval ;
+
+    oldval.t = *dest ;
+
+    do {
+      assume.i = oldval.i ;
+      newval.t = assume.t - val ;
+      oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
+    } while ( assume.i != oldval.i );
+
+    return oldval.t ;
+  }
+
+  template<class T>
+  KOKKOS_INLINE_FUNCTION
+  T atomic_fetch_sub(volatile T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(int64_t), const T&>::type val) {
+    union U {
+      int64_t i ;
+      T t ;
+      KOKKOS_INLINE_FUNCTION U() {};
+    } assume , oldval , newval ;
+
+    oldval.t = *dest ;
+
+    do {
+      assume.i = oldval.i ;
+      newval.t = assume.t - val ;
+      oldval.i = atomic_compare_exchange( (int64_t*)dest , assume.i , newval.i );
+    } while ( assume.i != oldval.i );
+
+    return oldval.t ;    
+  }
+}
+#endif
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..83b5792a6495a70607c9832a653cfdfa7e6f968d
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp
@@ -0,0 +1,51 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP
+#define GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP
+
+#ifndef KOKKOS_ROCM_HAS_WORKAROUNDS
+#define KOKKOS_ROCM_HAS_WORKAROUNDS 1
+#endif
+
+#endif
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.cpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e919d35903d2da15522738aa9870862dd6c0c065
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.cpp
@@ -0,0 +1,133 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_ROCMEXEC_HPP
+#define KOKKOS_ROCMEXEC_HPP
+
+#include <algorithm>
+#include <typeinfo>
+#include <Kokkos_Macros.hpp>
+//#include <ROCm/Kokkos_ROCmExec.hpp>
+#include <hc.hpp>
+
+#define ROCM_SPACE_ATOMIC_MASK      0x1FFFF
+#define ROCM_SPACE_ATOMIC_XOR_MASK  0x15A39
+#define ROCM_CONCURRENCY 20480
+//#define ROCM_CONCURRENCY 81920  # for fiji
+
+namespace Kokkos {
+  static int rocm_space_atomic_locks[ROCM_SPACE_ATOMIC_MASK+1];
+  static int rocm_space_scratch_locks[ROCM_CONCURRENCY];
+  static int rocm_space_threadid_locks[ROCM_CONCURRENCY];
+namespace Impl {
+// TODO: mimic cuda implemtation, add dgpu capability
+
+  void init_rocm_atomic_lock_array() {
+    static int is_initialized = 0;
+    if(!is_initialized)
+    {
+      for(int i = 0; i < ROCM_SPACE_ATOMIC_MASK+1; i++)
+        rocm_space_atomic_locks[i] = 0;
+      is_initialized = 1;
+    }
+  }
+
+  void init_rocm_scratch_lock_array() {
+    static int is_initialized = 0;
+    if(!is_initialized)
+    {
+      for(int i = 0; i < ROCM_CONCURRENCY; i++)
+        rocm_space_scratch_locks[i] = 0;
+      is_initialized = 1;
+    }
+  }
+
+  void init_rocm_threadid_lock_array() {
+    static int is_initialized = 0;
+    if(!is_initialized)
+    {
+      for(int i = 0; i < ROCM_CONCURRENCY; i++)
+        rocm_space_threadid_locks[i] = 0;
+      is_initialized = 1;
+    }
+  }
+
+  void init_lock_arrays_rocm_space() {
+     init_rocm_atomic_lock_array();
+//     init_rocm_scratch_lock_array();
+//     init_rocm_threadid_lock_array();
+  }
+}
+
+} // namespace Kokkos
+#if 0
+namespace Kokkos {
+namespace Impl {
+KOKKOS_INLINE_FUNCTION
+bool lock_address_rocm_space(void* ptr) {
+#if 0
+return(Kokkos::Impl::lock_address_host_space(ptr));
+#else
+  size_t offset = size_t(ptr);
+  offset = offset >> 2;
+  offset = offset & ROCM_SPACE_ATOMIC_MASK;
+  return (0 == hc::atomic_compare_exchange(&rocm_space_atomic_locks[offset],0,1));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+void unlock_address_rocm_space(void* ptr) {
+#if 0
+Kokkos::Impl::unlock_address_host_space(ptr) ;
+#else
+  size_t offset = size_t(ptr);
+  offset = offset >> 2;
+  offset = offset & ROCM_SPACE_ATOMIC_MASK;
+  hc::atomic_exchange( &rocm_space_atomic_locks[ offset ], 0);
+#endif
+}
+
+}
+} // namespace Kokkos
+#endif
+
+#endif /* #ifndef KOKKOS_ROCMEXEC_HPP */
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..48a27eb11d1f02632f9f1f74772eac9bbb874f2d
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp
@@ -0,0 +1,137 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_ROCMEXEC_HPP
+#define KOKKOS_ROCMEXEC_HPP
+
+#include <algorithm>
+#include <typeinfo>
+
+#if defined(__HCC_ACCELERATOR__)
+#define printf(...)
+#endif
+
+namespace Kokkos {
+namespace Impl {
+struct ROCmTraits {
+// TODO: determine if needed
+  enum { WavefrontSize       = 64 /* 64  */ };
+  enum { WorkgroupSize       = 64 /* 64  */ };
+  enum { WavefrontIndexMask  = 0x001f  /* Mask for warpindex */ };
+  enum { WavefrontIndexShift = 5       /* WarpSize == 1 << WarpShift */ };
+
+  enum { SharedMemoryBanks    = 32      /* Compute device 2.0 */ };
+  enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
+  enum { SharedMemoryUsage    = 0x04000 /* 16k shared / 48k L1 Cache */ };
+
+  enum { UpperBoundExtentCount    = 65535 /* Hard upper bound */ };
+#if 0
+  KOKKOS_INLINE_FUNCTION static
+  ROCmSpace::size_type wavefront_count( ROCmSpace::size_type i )
+    { return ( i +  WavefrontIndexMask ) >>  WavefrontIndexShift ; }
+
+  KOKKOS_INLINE_FUNCTION static
+  ROCmSpace::size_type wavefront_align( ROCmSpace::size_type i )
+    {
+      enum { Mask = ~ROCmSpace::size_type(  WavefrontIndexMask ) };
+      return ( i +  WavefrontIndexMask ) & Mask ;
+    }
+#endif
+};
+size_t rocm_internal_cu_count();
+size_t rocm_internal_maximum_workgroup_count();
+
+size_t * rocm_internal_scratch_flags( const size_t size );
+size_t * rocm_internal_scratch_space( const size_t size );
+
+}
+} // namespace Kokkos
+#define ROCM_SPACE_ATOMIC_MASK      0x1FFFF
+#define ROCM_SPACE_ATOMIC_XOR_MASK  0x15A39
+//int rocm_space_atomic_locks[ROCM_SPACE_ATOMIC_MASK+1];
+extern int
+   *rocm_space_atomic_locks;
+
+namespace Kokkos {
+namespace Impl {
+  void init_lock_arrays_rocm_space();
+
+  void* rocm_resize_scratch_space(size_t bytes, bool force_shrink = false);
+
+// TODO: determine if needed
+KOKKOS_INLINE_FUNCTION
+bool lock_address_rocm_space(void* ptr) {
+#if 0
+return(Kokkos::Impl::lock_address_host_space(ptr));
+#else
+  size_t offset = size_t(ptr);
+  offset = offset >> 2;
+  offset = offset & ROCM_SPACE_ATOMIC_MASK;
+  return (0 == hc::atomic_compare_exchange(&rocm_space_atomic_locks[offset],0,1));
+#endif
+}
+KOKKOS_INLINE_FUNCTION
+void unlock_address_rocm_space(void* ptr) {
+#if 0
+Kokkos::Impl::unlock_address_host_space(ptr) ;
+#else
+  size_t offset = size_t(ptr);
+  offset = offset >> 2;
+  offset = offset & ROCM_SPACE_ATOMIC_MASK;
+  hc::atomic_exchange( &rocm_space_atomic_locks[ offset ], 0);
+#endif
+}
+
+}
+} // namespace Kokkos
+
+namespace Kokkos {
+namespace Impl {
+//extern 
+//KOKKOS_INLINE_FUNCTION
+//void init_lock_arrays_rocm_space(); 
+
+
+}
+} // namespace Kokkos
+
+#endif /* #ifndef KOKKOS_ROCMEXEC_HPP */
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1322391d926122a5b54f2835ee0e6828571b0a4d
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp
@@ -0,0 +1,753 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+/*--------------------------------------------------------------------------*/
+/* Kokkos interfaces */
+
+#include <Kokkos_Core.hpp>
+
+/* only compile this file if ROCM is enabled for Kokkos */
+#ifdef KOKKOS_ENABLE_ROCM
+
+//#include <ROCm/Kokkos_ROCm_Internal.hpp>
+#include <impl/Kokkos_Error.hpp>
+#include <Kokkos_ROCmSpace.hpp>
+#include <ROCm/Kokkos_ROCm_Exec.hpp>
+
+/*--------------------------------------------------------------------------*/
+/* Standard 'C' libraries */
+#include <stdlib.h>
+
+/* Standard 'C++' libraries */
+#include <vector>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+
+
+//KOKKOS_INLINE_FUNCTION
+// Kokkos::Impl::ROCmLockArraysStruct kokkos_impl_rocm_lock_arrays ;
+
+
+/*--------------------------------------------------------------------------*/
+namespace Kokkos {
+namespace Impl {
+
+#if 0
+namespace {
+__global__
+void query_rocm_kernel_arch( int * d_arch )
+{
+#if defined( __HCC_ACCELERATOR__ )
+  *d_arch = OCM_ARCH__ ;
+#else
+  *d_arch = 0 ;
+#endif
+}
+
+/** Query what compute capability is actually launched to the device: */
+int rocm_kernel_arch()
+{
+  int * d_arch = 0 ;
+  rocmMalloc( (void **) & d_arch , sizeof(int) );
+  query_rocm_kernel_arch<<<1,1>>>( d_arch );
+  int arch = 0 ;
+  rocmMemcpy( & arch , d_arch , sizeof(int) , rocmMemcpyDefault );
+  rocmFree( d_arch );
+  return arch ;
+}
+bool rocm_launch_blocking()
+{
+  const char * env = getenv("ROCM_LAUNCH_BLOCKING");
+
+  if (env == 0) return false;
+
+  return atoi(env);
+}
+
+}
+#endif
+
+// true device memory allocation, not visible from host
+void * rocm_device_allocate(int size)
+{
+  void * ptr;
+  hc::accelerator acc;
+  ptr = hc::am_alloc(size,acc,0);
+  return ptr;
+}
+
+// host pinned allocation
+// flag = 1, non-coherent, host resident, but with gpu address space pointer
+// flag = 2, coherent, host resident, but with host address space pointer
+void * rocm_hostpinned_allocate(int size)
+{
+  void * ptr;
+  hc::accelerator acc;
+  ptr = hc::am_alloc(size,acc,2);
+  return ptr;
+}
+// same free used by all rocm memory allocations
+void rocm_device_free(void * ptr)
+{
+  hc::am_free(ptr);
+}
+
+
+KOKKOS_INLINE_FUNCTION
+void rocm_device_synchronize()
+{
+   hc::accelerator_view av = hc::accelerator().get_default_view();
+   hc::completion_future fut = av.create_marker();
+   fut.wait();
+}
+
+void rocm_internal_error_throw( const char * name, const char * file, const int line )
+{
+#if 0
+  std::ostringstream out ;
+  out << name << " error( " << rocmGetErrorName(e) << "): " << rocmGetErrorString(e);
+  if (file) {
+    out << " " << file << ":" << line;
+  }
+  throw_runtime_exception( out.str() );
+#endif
+}
+
+//----------------------------------------------------------------------------
+// Some significant rocm device properties:
+//
+// rocmDeviceProp::name                : Text label for device
+// rocmDeviceProp::major               : Device major number
+// rocmDeviceProp::minor               : Device minor number
+// rocmDeviceProp::workgroupSize       : number of threads per workgroup
+// rocmDeviceProp::multiProcessorCount : number of multiprocessors
+// rocmDeviceProp::sharedMemPerBlock   : capacity of shared memory per wavefront
+// rocmDeviceProp::totalConstMem       : capacity of constant memory
+// rocmDeviceProp::totalGlobalMem      : capacity of global memory
+// rocmDeviceProp::maxGridSize[3]      : maximum grid size
+
+//
+//
+// the data we have available from a ROCm accelerator
+// std::wstring get_device_path()
+// std::wstring get_description()
+// unsigned int get_version()
+// bool get_has_display()
+// size_t get_dedicated_memory()
+// bool get_supports_double_precision()
+// bool get_supports_limited_double_precision()
+// bool get_is_debug()
+// bool get_supports_cpu_shared_memory()
+// size_t get_max_tile_static_size()
+// unsigned int get_cu_count()
+// bool has_cpu_accessible_am() 
+struct rocmDeviceProp {
+   char name[256];
+   char description[256];
+   unsigned int version;
+   int device_type;
+   int device_ordinal;
+   int major;
+   int minor;
+   size_t totalGlobalMem;
+   size_t sharedMemPerWavefront;
+   int WavefrontSize;
+   int WorkgroupSize;
+   int MaxTileCount;
+   int maxThreadsPerWorkgroup;
+   int multiProcessorCount;
+   int canMapHostMemory;
+   bool APU;
+};
+
+
+
+void rocmGetDeviceProperties(struct rocmDeviceProp* devProp, int device)
+{
+   std::wstring s;
+   int i,n;
+   hc::accelerator acc;
+   std::vector<hc::accelerator> accv = acc.get_all() ;
+
+   hc::accelerator a = accv[device];
+
+   s=a.get_device_path();
+   i = 0;
+   for(wchar_t c: s)
+      if((n=std::wctomb(&devProp->name[i],c))>0)
+         i+=n;
+
+   /* assume a CPU */
+   devProp->version = a.get_version();
+   devProp->major = a.get_version()>>16; // for CPU, these are meaningless 
+   devProp->minor = a.get_version()&0xff;
+   devProp->device_ordinal = 0;
+
+   /* is this an AMD graphics card */
+   if((devProp->name[0]=='g') && (devProp->name[1]=='f') 
+                              && (devProp->name[2]=='x')) {
+   /* for AMD cards, the name has the format gfxMmmO */
+     
+      devProp->device_type    = ((devProp->name[3]-0x30)<<16)
+                              + ((devProp->name[4]-0x30)<<8)
+                              +  (devProp->name[5]-0x30);
+      devProp->device_ordinal = devProp->name[6]-0x30;
+      devProp->major = devProp->name[3]-0x30;
+      devProp->minor = devProp->name[5]-0x30;
+   }
+
+   s=a.get_description();
+   i = 0;
+   for(wchar_t c: s)
+      if((n=std::wctomb(&devProp->description[i],c))>0)
+         i+=n;
+   devProp->totalGlobalMem = a.get_dedicated_memory();
+   devProp->sharedMemPerWavefront = a.get_max_tile_static_size();
+   devProp->WavefrontSize = 64;
+   devProp->WorkgroupSize = 256; // preferred
+   devProp->MaxTileCount  = 409600; // as defined in /opt/rocm/hcc-lc/include/hsa_new.h
+   devProp->maxThreadsPerWorkgroup = 1024;
+   devProp->multiProcessorCount = a.get_cu_count();
+   devProp->canMapHostMemory = a.get_supports_cpu_shared_memory();
+// Kaveri has 64KB L2 per CU, 16KB L1, 64KB Vector Regs/SIMD, or 128 regs/thread
+// GCN has 64KB LDS per CU
+
+//Kaveri APU is 7:0:0
+//Carrizo APU is 8:0:1
+   devProp->APU = (((devProp->major==7)&&(devProp->minor==0))|
+                   ((devProp->major==8)&&(devProp->minor==1)))?true:false;
+}
+
+namespace {
+
+
+
+class ROCmInternalDevices {
+public:
+  enum { MAXIMUM_DEVICE_COUNT = 64 };
+  struct rocmDeviceProp  m_rocmProp[ MAXIMUM_DEVICE_COUNT ] ;
+  int                    m_rocmDevCount ;
+
+  ROCmInternalDevices();
+
+  static const ROCmInternalDevices & singleton();
+};
+
+ROCmInternalDevices::ROCmInternalDevices()
+{
+   hc::accelerator acc;
+   std::vector<hc::accelerator> accv = acc.get_all() ;
+   m_rocmDevCount = accv.size();
+
+  if(m_rocmDevCount > MAXIMUM_DEVICE_COUNT) {
+    Kokkos::abort("Sorry, you have more GPUs per node than we thought anybody would ever have. Please report this to github.com/kokkos/kokkos.");
+  }
+  for ( int i = 0 ; i < m_rocmDevCount ; ++i ) {
+    rocmGetDeviceProperties( m_rocmProp + i , i );
+  }
+}
+
+const ROCmInternalDevices & ROCmInternalDevices::singleton()
+{
+  static ROCmInternalDevices* self = nullptr;
+  if (!self) {
+    self = new ROCmInternalDevices();
+  }
+  return *self;
+
+}
+
+}
+
+//----------------------------------------------------------------------------
+
+class ROCmInternal {
+private:
+
+  ROCmInternal( const ROCmInternal & );
+  ROCmInternal & operator = ( const ROCmInternal & );
+
+
+public:
+
+  typedef Kokkos::Experimental::ROCm::size_type size_type ;
+
+  int         m_rocmDev ;
+  int         m_rocmArch ;
+  unsigned    m_multiProcCount ;
+  unsigned    m_maxWorkgroup ;
+  unsigned    m_maxSharedWords ;
+  size_type   m_scratchSpaceCount ;
+  size_type   m_scratchFlagsCount ;
+  size_type * m_scratchSpace ;
+  size_type * m_scratchFlags ;
+
+  static int was_finalized;
+
+  static ROCmInternal & singleton();
+
+  int verify_is_initialized( const char * const label ) const ;
+
+  int is_initialized() const
+    { return 0 != m_scratchSpace && 0 != m_scratchFlags ; }
+
+  void initialize( int rocm_device_id );
+  void finalize();
+
+  void print_configuration( std::ostream & ) const ;
+
+
+  ~ROCmInternal();
+
+  ROCmInternal()
+    : m_rocmDev( -1 )
+    , m_rocmArch( -1 )
+    , m_multiProcCount( 0 )
+    , m_maxWorkgroup( 0 )
+    , m_maxSharedWords( 0 )
+    , m_scratchSpaceCount( 0 )
+    , m_scratchFlagsCount( 0 )
+    , m_scratchSpace( 0 )
+    , m_scratchFlags( 0 )
+    {}
+
+  size_type * scratch_space( const size_type size );
+  size_type * scratch_flags( const size_type size );
+};
+
+int ROCmInternal::was_finalized = 0;
+//----------------------------------------------------------------------------
+
+
+void ROCmInternal::print_configuration( std::ostream & s ) const
+{
+  const ROCmInternalDevices & dev_info = ROCmInternalDevices::singleton();
+
+#if defined( KOKKOS_ENABLE_ROCM )
+    s << "macro  KOKKOS_ENABLE_ROCM      : defined" << std::endl ;
+#endif
+#if defined( __hcc_version__ )
+    s << "macro  __hcc_version__          = " << __hcc_version__
+      << std::endl ;
+#endif
+
+  for ( int i = 0 ; i < dev_info.m_rocmDevCount ; ++i ) {
+    s << "Kokkos::Experimental::ROCm[ " << i << " ] "
+      << dev_info.m_rocmProp[i].name
+      << " version " << (dev_info.m_rocmProp[i].major) << "." << dev_info.m_rocmProp[i].minor
+      << ", Total Global Memory: " << human_memory_size(dev_info.m_rocmProp[i].totalGlobalMem)
+      << ", Shared Memory per Wavefront: " << human_memory_size(dev_info.m_rocmProp[i].sharedMemPerWavefront);
+    if ( m_rocmDev == i ) s << " : Selected" ;
+    s << std::endl ;
+  }
+}
+
+//----------------------------------------------------------------------------
+
+ROCmInternal::~ROCmInternal()
+{
+  if ( m_scratchSpace ||
+       m_scratchFlags ) {
+    std::cerr << "Kokkos::Experimental::ROCm ERROR: Failed to call Kokkos::Experimental::ROCm::finalize()"
+              << std::endl ;
+    std::cerr.flush();
+  }
+
+  m_rocmDev                 = -1 ;
+  m_rocmArch                = -1 ;
+  m_multiProcCount          = 0 ;
+  m_maxWorkgroup            = 0 ;
+  m_maxSharedWords          = 0 ;
+  m_scratchSpaceCount       = 0 ;
+  m_scratchFlagsCount       = 0 ;
+  m_scratchSpace            = 0 ;
+  m_scratchFlags            = 0 ;
+}
+
+int ROCmInternal::verify_is_initialized( const char * const label ) const
+{
+  if ( m_rocmDev < 0 ) {
+    std::cerr << "Kokkos::Experimental::ROCm::" << label << " : ERROR device not initialized" << std::endl ;
+  }
+  return 0 <= m_rocmDev ;
+}
+
+ROCmInternal & ROCmInternal::singleton()
+{
+  static ROCmInternal* self = nullptr ;
+  if (!self) {
+    self = new ROCmInternal();
+  }
+  return *self ;
+
+}
+
+void ROCmInternal::initialize( int rocm_device_id  )
+{
+  if ( was_finalized ) Kokkos::abort("Calling ROCm::initialize after ROCm::finalize is illegal\n");
+
+  if ( is_initialized() ) return;
+
+  enum { WordSize = sizeof(size_type) };
+
+  if ( ! HostSpace::execution_space::is_initialized() ) {
+    const std::string msg("ROCm::initialize ERROR : HostSpace::execution_space is not initialized");
+    throw_runtime_exception( msg );
+  }
+
+  const ROCmInternalDevices & dev_info = ROCmInternalDevices::singleton();
+
+  const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ;
+
+  const bool ok_id   = 1 <= rocm_device_id &&
+                            rocm_device_id < dev_info.m_rocmDevCount ;
+
+  // Need at least a GPU device
+
+  const bool ok_dev = ok_id &&
+    ( 1 <= dev_info.m_rocmProp[ rocm_device_id ].major &&
+      0 <= dev_info.m_rocmProp[ rocm_device_id ].minor );
+  if ( ok_init && ok_dev ) {
+
+    const struct rocmDeviceProp & rocmProp =
+      dev_info.m_rocmProp[ rocm_device_id ];
+
+    m_rocmDev = rocm_device_id ;
+
+//  rocmSetDevice( m_rocmDev ) );
+    Kokkos::Impl::rocm_device_synchronize();
+
+/*
+    // Query what compute capability architecture a kernel executes:
+    m_rocmArch = rocm_kernel_arch();
+    if ( m_rocmArch != rocmProp.major * 100 + rocmProp.minor * 10 ) {
+      std::cerr << "Kokkos::Experimental::ROCm::initialize WARNING: running kernels compiled for compute capability "
+                << ( m_rocmArch / 100 ) << "." << ( ( m_rocmArch % 100 ) / 10 )
+                << " on device with compute capability "
+                << rocmProp.major << "." << rocmProp.minor
+                << " , this will likely reduce potential performance."
+                << std::endl ;
+    }
+*/
+    // number of multiprocessors
+
+    m_multiProcCount = rocmProp.multiProcessorCount ;
+
+    //----------------------------------
+    // Maximum number of wavefronts,
+    // at most one workgroup per thread in a workgroup for reduction.
+
+
+    m_maxSharedWords = rocmProp.sharedMemPerWavefront/ WordSize ;
+
+    //----------------------------------
+    // Maximum number of Workgroups:
+
+    m_maxWorkgroup = 5*rocmProp.multiProcessorCount;  //TODO: confirm usage and value
+
+    //----------------------------------
+    // Multiblock reduction uses scratch flags for counters
+    // and scratch space for partial reduction values.
+    // Allocate some initial space.  This will grow as needed.
+
+    {
+      const unsigned reduce_block_count = m_maxWorkgroup * Impl::ROCmTraits::WorkgroupSize ;
+
+      (void) scratch_flags( reduce_block_count * 2  * sizeof(size_type) );
+      (void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
+    }
+    //----------------------------------
+
+  }
+  else {
+
+    std::ostringstream msg ;
+    msg << "Kokkos::Experimental::ROCm::initialize(" << rocm_device_id << ") FAILED" ;
+
+    if ( ! ok_init ) {
+      msg << " : Already initialized" ;
+    }
+    if ( ! ok_id ) {
+      msg << " : Device identifier out of range "
+          << "[0.." << (dev_info.m_rocmDevCount-1) << "]" ;
+    }
+    else if ( ! ok_dev ) {
+      msg << " : Device " ;
+      msg << dev_info.m_rocmProp[ rocm_device_id ].major ;
+      msg << "." ;
+      msg << dev_info.m_rocmProp[ rocm_device_id ].minor ;
+      msg << " Need at least a GPU" ;
+      msg << std::endl;
+    }
+    Kokkos::Impl::throw_runtime_exception( msg.str() );
+  }
+
+
+  // Init the array for used for arbitrarily sized atomics
+  Kokkos::Impl::init_lock_arrays_rocm_space();
+
+//  Kokkos::Impl::ROCmLockArraysStruct locks;
+//  locks.atomic = atomic_lock_array_rocm_space_ptr(false);
+//  locks.scratch = scratch_lock_array_rocm_space_ptr(false);
+//  locks.threadid = threadid_lock_array_rocm_space_ptr(false);
+//  rocmMemcpyToSymbol( kokkos_impl_rocm_lock_arrays , & locks , sizeof(ROCmLockArraysStruct) );
+}
+
+//----------------------------------------------------------------------------
+
+typedef Kokkos::Experimental::ROCm::size_type ScratchGrain[ Impl::ROCmTraits::WorkgroupSize ] ;
+enum { sizeScratchGrain = sizeof(ScratchGrain) };
+
+void rocmMemset(  Kokkos::Experimental::ROCm::size_type * ptr ,  Kokkos::Experimental::ROCm::size_type value , Kokkos::Experimental::ROCm::size_type size)
+{
+char * mptr = (char * ) ptr;
+#if 0
+   parallel_for_each(hc::extent<1>(size),
+                    [=, &ptr]
+                    (hc::index<1> idx) __HC__
+   {
+      int i = idx[0];
+      ptr[i] = value;
+   }).wait();
+#else
+   for (int i= 0; i<size ; i++)
+   {
+     mptr[i] = (char) value;
+   }
+#endif
+}
+
+Kokkos::Experimental::ROCm::size_type *
+ROCmInternal::scratch_flags( const Kokkos::Experimental::ROCm::size_type size )
+{
+  if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) {
+
+
+    m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
+
+    typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+
+    Record * const r = Record::allocate( Kokkos::HostSpace()
+                                       , "InternalScratchFlags"
+                                       , ( sizeScratchGrain  * m_scratchFlagsCount ) );
+
+    Record::increment( r );
+
+    m_scratchFlags = reinterpret_cast<size_type *>( r->data() );
+
+    rocmMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain );
+  }
+
+  return m_scratchFlags ;
+}
+
+Kokkos::Experimental::ROCm::size_type *
+ROCmInternal::scratch_space( const Kokkos::Experimental::ROCm::size_type size )
+{
+  if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) {
+
+    m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
+
+     typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+
+     Record * const r = Record::allocate( Kokkos::HostSpace()
+                                        , "InternalScratchSpace"
+                                        , ( sizeScratchGrain  * m_scratchSpaceCount ) );
+
+     Record::increment( r );
+
+     m_scratchSpace = reinterpret_cast<size_type *>( r->data() );
+  }
+
+  return m_scratchSpace ;
+}
+
+//----------------------------------------------------------------------------
+
+void ROCmInternal::finalize()
+{
+  was_finalized = 1;
+  if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
+
+//    atomic_lock_array_rocm_space_ptr(false);
+//    scratch_lock_array_rocm_space_ptr(false);
+//    threadid_lock_array_rocm_space_ptr(false);
+
+    typedef Kokkos::Experimental::Impl::SharedAllocationRecord< HostSpace > RecordROCm ;
+    typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace > RecordHost ;
+
+    RecordROCm::decrement( RecordROCm::get_record( m_scratchFlags ) );
+    RecordROCm::decrement( RecordROCm::get_record( m_scratchSpace ) );
+
+    m_rocmDev             = -1 ;
+    m_multiProcCount      = 0 ;
+    m_maxWorkgroup        = 0 ;
+    m_maxSharedWords      = 0 ;
+    m_scratchSpaceCount   = 0 ;
+    m_scratchFlagsCount   = 0 ;
+    m_scratchSpace        = 0 ;
+    m_scratchFlags        = 0 ;
+  }
+}
+
+//----------------------------------------------------------------------------
+
+Kokkos::Experimental::ROCm::size_type rocm_internal_cu_count()
+{ return ROCmInternal::singleton().m_multiProcCount ; }
+
+Kokkos::Experimental::ROCm::size_type rocm_internal_maximum_extent_size()
+{ return ROCmInternal::singleton().m_maxWorkgroup ; }
+
+Kokkos::Experimental::ROCm::size_type rocm_internal_maximum_shared_words()
+{ return ROCmInternal::singleton().m_maxSharedWords ; }
+
+Kokkos::Experimental::ROCm::size_type * rocm_internal_scratch_space( const Kokkos::Experimental::ROCm::size_type size )
+{ return ROCmInternal::singleton().scratch_space( size ); }
+
+Kokkos::Experimental::ROCm::size_type * rocm_internal_scratch_flags( const Kokkos::Experimental::ROCm::size_type size )
+{ return ROCmInternal::singleton().scratch_flags( size ); }
+
+
+
+} // namespace Impl
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+
+//ROCm::size_type ROCm::detect_device_count()
+//{ return Impl::ROCmInternalDevices::singleton().m_rocmDevCount ; }
+
+int ROCm::concurrency() {
+#if defined(KOKKOS_ARCH_KAVERI) 
+  return 8*64*40;  // 20480 kaveri
+#else
+  return 32*8*40;  // 81920 fiji and hawaii
+#endif
+}
+int ROCm::is_initialized()
+{ return Kokkos::Impl::ROCmInternal::singleton().is_initialized(); }
+
+void ROCm::initialize( const ROCm::SelectDevice config )
+{
+  Kokkos::Impl::ROCmInternal::singleton().initialize( config.rocm_device_id );
+
+  #if defined(KOKKOS_ENABLE_PROFILING)
+    Kokkos::Profiling::initialize();
+  #endif
+}
+
+#if 0
+std::vector<unsigned>
+ROCm::detect_device_arch()
+{
+  const Impl::ROCmInternalDevices & s = Impl::ROCmInternalDevices::singleton();
+
+  std::vector<unsigned> output( s.m_rocmDevCount );
+
+  for ( int i = 0 ; i < s.m_rocmDevCount ; ++i ) {
+    output[i] = s.m_rocmProp[i].major * 100 + s.m_rocmProp[i].minor ;
+  }
+
+  return output ;
+}
+
+ROCm::size_type ROCm::device_arch()
+{
+  return 1 ;
+}
+#endif
+
+void ROCm::finalize()
+{
+  Kokkos::Impl::ROCmInternal::singleton().finalize();
+
+  #if defined(KOKKOS_ENABLE_PROFILING)
+    Kokkos::Profiling::finalize();
+  #endif
+}
+
+ROCm::ROCm()
+  : m_device( Kokkos::Impl::ROCmInternal::singleton().m_rocmDev )
+{
+  Kokkos::Impl::ROCmInternal::singleton().verify_is_initialized( "ROCm instance constructor" );
+}
+
+bool ROCm::isAPU(int device) {
+  const Kokkos::Impl::ROCmInternalDevices & dev_info = 
+              Kokkos::Impl::ROCmInternalDevices::singleton();
+  return (dev_info.m_rocmProp[device].APU);  
+}
+
+bool ROCm::isAPU() {
+  return ROCm::isAPU(rocm_device());  
+}
+
+//ROCm::ROCm( const int instance_id )
+//  : m_device( Impl::ROCmInternal::singleton().m_rocmDev )
+//{}
+
+void ROCm::print_configuration( std::ostream & s , const bool )
+{ Kokkos::Impl::ROCmInternal::singleton().print_configuration( s ); }
+
+bool ROCm::sleep() { return false ; }
+
+bool ROCm::wake() { return true ; }
+
+void ROCm::fence()
+{
+  Kokkos::Impl::rocm_device_synchronize();
+}
+
+const char* ROCm::name() { return "ROCm"; }
+
+} // namespace Experimental
+} // namespace Kokkos
+
+#endif // KOKKOS_ENABLE_ROCM
+//----------------------------------------------------------------------------
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Invoke.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Invoke.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..481e7df3a431d8f68cbff760abdf8475ae1df2df
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Invoke.hpp
@@ -0,0 +1,138 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <type_traits>
+#include <Kokkos_Macros.hpp>
+
+#if !defined( KOKKOS_ROCM_INVOKE_H )
+#define KOKKOS_ROCM_INVOKE_H
+
+namespace Kokkos {
+namespace Impl {
+
+template<class Tag, class F, class... Ts, typename std::enable_if<(!std::is_void<Tag>()), int>::type = 0>
+KOKKOS_INLINE_FUNCTION void rocm_invoke(F&& f, Ts&&... xs)
+{
+  f(Tag(), static_cast<Ts&&>(xs)...);
+}
+
+template<class Tag, class F, class... Ts, typename std::enable_if<(std::is_void<Tag>()), int>::type = 0>
+KOKKOS_INLINE_FUNCTION void rocm_invoke(F&& f, Ts&&... xs)
+{
+  f(static_cast<Ts&&>(xs)...);
+}
+
+
+template<class F, class Tag=void>
+struct rocm_invoke_fn
+{
+    F* f;
+    rocm_invoke_fn(F& f_) : f(&f_)
+    {}
+
+    template<class... Ts>
+    KOKKOS_INLINE_FUNCTION void operator()(Ts&&... xs) const
+    {
+        rocm_invoke<Tag>(*f, static_cast<Ts&&>(xs)...);
+    }
+};
+
+template<class Tag, class F>
+KOKKOS_INLINE_FUNCTION rocm_invoke_fn<F, Tag> make_rocm_invoke_fn(F& f)
+{
+    return {f};
+}
+
+template<class T>
+KOKKOS_INLINE_FUNCTION T& rocm_unwrap(T& x)
+{
+    return x;
+}
+
+template<class T>
+KOKKOS_INLINE_FUNCTION T& rocm_unwrap(std::reference_wrapper<T> x)
+{
+    return x;
+}
+
+template<class F, class T>
+struct rocm_capture_fn
+{
+    F f;
+    T data;
+
+    KOKKOS_INLINE_FUNCTION rocm_capture_fn(F f_, T x) 
+    : f(f_), data(x)
+    {}
+
+    template<class... Ts>
+    KOKKOS_INLINE_FUNCTION void operator()(Ts&&... xs) const
+    {
+        f(rocm_unwrap(data), static_cast<Ts&&>(xs)...);
+    }
+};
+
+template<class F, class T>
+KOKKOS_INLINE_FUNCTION rocm_capture_fn<F, T> rocm_capture(F f, T x)
+{
+    return {f, x};
+}
+
+template<class F, class T, class U, class... Ts>
+KOKKOS_INLINE_FUNCTION auto rocm_capture(F f, T x, U y, Ts... xs) -> decltype(rocm_capture(rocm_capture(f, x), y, xs...))
+{
+    return rocm_capture(rocm_capture(f, x), y, xs...);
+}
+
+struct rocm_apply_op
+{
+    template<class F, class... Ts>
+    KOKKOS_INLINE_FUNCTION void operator()(F&& f, Ts&&... xs) const
+    {
+        f(static_cast<Ts&&>(xs)...);
+    }
+};
+
+}}
+
+#endif
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Join.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Join.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d3d150703ed3bc4352f56f3294bffe1134cf0dad
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Join.hpp
@@ -0,0 +1,72 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#if !defined( KOKKOS_ROCM_JOIN_H )
+#define KOKKOS_ROCM_JOIN_H
+
+namespace Kokkos {
+namespace Impl {
+
+
+// Adaptor to use ValueJoin with standard algorithms
+template<class Joiner, class F>
+struct join_operator
+{
+  const F* fp;
+  template<class T, class U>
+  T operator()(T x, const U& y) const
+  {
+    Joiner::join(*fp, &x, &y);
+    return x;
+  }
+};
+
+template<class Joiner, class F>
+join_operator<Joiner, F> make_join_operator(const F& f)
+{
+  return join_operator<Joiner, F>{&f};
+}
+
+}}
+
+#endif
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..14ab52a1c20aca9bc0d846dbd47a5744f81c029c
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp
@@ -0,0 +1,1265 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <algorithm>
+#include <typeinfo>
+#include <ROCm/Kokkos_ROCm_Reduce.hpp>
+#include <ROCm/Kokkos_ROCm_Scan.hpp>
+#include <ROCm/Kokkos_ROCm_Vectorization.hpp>
+
+
+namespace Kokkos {
+namespace Impl {
+
+struct ROCmTeamMember ;
+
+template< class ... Properties >
+class TeamPolicyInternal< Kokkos::Experimental::ROCm, Properties ... >: public PolicyTraits<Properties ...> {
+private:
+  int m_league_size ;
+  int m_team_size ;
+  int m_vector_length ;
+  int m_team_scratch_size[2] ;
+  int m_thread_scratch_size[2] ;
+  int m_chunk_size ;
+
+
+public:
+
+  using execution_policy = TeamPolicyInternal ;
+  using execution_space  = Kokkos::Experimental::ROCm ;
+  typedef PolicyTraits<Properties ... > traits;
+
+  TeamPolicyInternal& operator = (const TeamPolicyInternal& p) {
+    m_league_size = p.m_league_size;
+    m_team_size = p.m_team_size;
+    m_vector_length = p.m_vector_length;
+    m_team_scratch_size[0] = p.m_team_scratch_size[0];
+    m_team_scratch_size[1] = p.m_team_scratch_size[1];
+    m_thread_scratch_size[0] = p.m_thread_scratch_size[0];
+    m_thread_scratch_size[1] = p.m_thread_scratch_size[1];
+    m_chunk_size = p.m_chunk_size;
+    return *this;
+  }
+
+  TeamPolicyInternal()
+    : m_league_size( 0 )
+    , m_team_size( 0 )
+    , m_vector_length( 0 )
+    , m_team_scratch_size {0,0}
+    , m_thread_scratch_size {0,0}
+    , m_chunk_size ( 64 )
+   {}
+
+  TeamPolicyInternal( const int arg_league_size
+            , const int arg_team_size )
+    : m_league_size( arg_league_size ),
+      m_team_size( arg_team_size )
+    , m_team_scratch_size {0,0}
+    , m_thread_scratch_size {0,0}
+    , m_chunk_size ( 64 )
+    {}
+
+  TeamPolicyInternal( const int arg_league_size
+            , const int arg_team_size
+            , const int vector_length_request=1)
+    : m_league_size( arg_league_size ),
+      m_team_size( arg_team_size ),
+      m_vector_length (vector_length_request)
+    , m_team_scratch_size {0,0}
+    , m_thread_scratch_size {0,0}
+    , m_chunk_size ( 64 )
+    {}
+
+  TeamPolicyInternal( const int arg_league_size
+            , const Kokkos::AUTO_t )
+    : m_league_size( arg_league_size ), m_team_size( -1 )
+    , m_team_scratch_size {0,0}
+    , m_thread_scratch_size {0,0}
+    , m_chunk_size ( 64 )
+    {}
+
+  TeamPolicyInternal( const int arg_league_size
+            , const Kokkos::AUTO_t
+            , const int vector_length_request)
+    : m_league_size( arg_league_size ),
+      m_team_size( -1 ),
+      m_vector_length (vector_length_request)
+    , m_team_scratch_size {0,0}
+    , m_thread_scratch_size {0,0}
+    , m_chunk_size ( 64 )
+    {}
+
+  inline int chunk_size() const { return m_chunk_size ; }
+
+  /** \brief set chunk_size to a discrete value*/
+  KOKKOS_INLINE_FUNCTION TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const {
+    TeamPolicyInternal p = *this;
+    p.m_chunk_size = chunk_size_;
+    return p;
+  }
+
+  /** \brief set per team scratch size for a specific level of the scratch hierarchy */
+  inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const {
+    TeamPolicyInternal p = *this;
+    p.m_team_scratch_size[level] = per_team.value;
+    return p;
+  };
+
+  /** \brief set per thread scratch size for a specific level of the scratch hierarchy */
+  inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const {
+    TeamPolicyInternal p = *this;
+    p.m_thread_scratch_size[level] = per_thread.value;
+    return p;
+  };
+
+  /** \brief set per thread and per team scratch size for a specific level of the scratch hierarchy */
+  inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const {
+    TeamPolicyInternal p = *this;
+    p.m_team_scratch_size[level] = per_team.value;
+    p.m_thread_scratch_size[level] = per_thread.value;
+    return p;
+  };
+
+// TODO:  evaluate proper team_size_max requirements
+  template< class Functor_Type>
+  KOKKOS_INLINE_FUNCTION static
+  int team_size_max( const Functor_Type & functor)
+  {
+    typedef typename Kokkos::Impl::FunctorValueTraits<Functor_Type, void>::value_type value_type;
+    return team_size_recommended(functor);
+    // return std::min(Kokkos::Impl::get_max_tile_size() / sizeof(value_type), Kokkos::Impl::get_max_tile_thread());
+  }
+
+  template< class Functor_Type>
+  KOKKOS_INLINE_FUNCTION static int team_size_recommended(const Functor_Type & functor)
+  { return Kokkos::Impl::get_tile_size<typename Kokkos::Impl::FunctorValueTraits<Functor_Type, void>::value_type>(); }
+
+  template< class Functor_Type >
+  KOKKOS_INLINE_FUNCTION static int team_size_recommended(const Functor_Type &functor, const int vector_length)
+ {
+   int max = team_size_recommended( functor )/vector_length;
+   if(max < 1) max = 1;
+   return(max);
+ }
+
+  template<class F>
+  KOKKOS_INLINE_FUNCTION int team_size(const F& f) const { return (m_team_size > 0) ? m_team_size : team_size_recommended(f); }
+  KOKKOS_INLINE_FUNCTION int team_size() const { return (m_team_size > 0) ? m_team_size : Impl::get_max_tile_thread(); ; }
+  KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
+
+
+  inline int vector_length()   const { return m_vector_length ; }
+  inline int scratch_size(int level, int team_size_ = -1) const {
+    if(team_size_<0) team_size_ = m_team_size;
+    return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level];
+  }
+  inline size_t team_scratch_size(int level) const {
+    return m_team_scratch_size[level];
+  }
+  inline size_t thread_scratch_size(int level) const {
+    return m_thread_scratch_size[level];
+  }
+
+  typedef Impl::ROCmTeamMember member_type;
+};
+
+  struct ROCmTeamMember {
+    typedef Kokkos::Experimental::ROCm                             execution_space ;
+    typedef Kokkos::ScratchMemorySpace<Kokkos::Experimental::ROCm> scratch_memory_space ;
+
+    KOKKOS_INLINE_FUNCTION
+    const scratch_memory_space & team_shmem() const 
+      { return m_team_shared.set_team_thread_mode(0,1,0); }
+    KOKKOS_INLINE_FUNCTION
+    const execution_space::scratch_memory_space & team_scratch(const int& level) const
+      { return m_team_shared.set_team_thread_mode(level,1,0) ; }
+    KOKKOS_INLINE_FUNCTION
+    const execution_space::scratch_memory_space & thread_scratch(const int& level) const
+      { return m_team_shared.set_team_thread_mode(level,
+                                             team_size(),
+                                             team_rank()) ; }
+
+
+    /* Rank of this team within the league of teams */
+    KOKKOS_INLINE_FUNCTION int league_rank() const { return m_idx.tile[0]; }
+    /* Number of teams in the league */
+    KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; }
+    /* Rank of this thread within this team */
+    KOKKOS_INLINE_FUNCTION int team_rank() const { return m_idx.local[0] / m_vector_length; }
+    /* Rank of this thread within this thread */
+    KOKKOS_INLINE_FUNCTION int vector_rank() const { return m_idx.local[0] % m_vector_length; }
+    KOKKOS_INLINE_FUNCTION int lindex() const { return m_idx.local[0]; }
+    KOKKOS_INLINE_FUNCTION int gindex() const { return m_idx.global[0]; }
+    KOKKOS_INLINE_FUNCTION int tindex() const { return m_idx.tile[0]; }
+    KOKKOS_INLINE_FUNCTION int tile_dim() const { return m_idx.tile_dim[0]; }
+    KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size; }
+    KOKKOS_INLINE_FUNCTION int vector_length() const { return m_vector_length; }
+
+
+    KOKKOS_INLINE_FUNCTION
+    ROCmTeamMember( const hc::tiled_index< 1 > & arg_idx, int league_size_,int team_size_ )
+      : m_league_size( league_size_ )
+      , m_team_size( team_size_ )
+      , m_team_shared( nullptr, 0 )
+      , m_vector_length( 1 )
+      , m_idx( arg_idx )
+      {}
+
+    KOKKOS_INLINE_FUNCTION
+    ROCmTeamMember( const hc::tiled_index< 1 > & arg_idx, int league_size_,int team_size_, char * shared,  std::size_t shsize, std::size_t scratch_size0, char * scratch_ptr, std::size_t scratch_size1, std::size_t vector_length)
+      : m_league_size( league_size_ )
+      , m_team_size( team_size_ )
+      , m_team_shared( shared +  
+                          arg_idx.tile[0]*(shsize+scratch_size0), 
+                       (shsize+scratch_size0)*league_size_, 
+                       scratch_ptr + arg_idx.tile[0]*scratch_size1, 
+                       scratch_size1*league_size_)
+      , m_vector_length( vector_length )
+      , m_idx( arg_idx )
+      {}
+
+    KOKKOS_INLINE_FUNCTION
+    void team_barrier() const {
+      m_idx.barrier.wait();
+    }
+
+    template<class ValueType>
+    KOKKOS_INLINE_FUNCTION
+    void team_broadcast(const ValueType& value, const int& thread_id ) const 
+    {
+      static_assert(std::is_trivially_default_constructible<ValueType>(), "Only trivial constructible types can be broadcasted");
+      tile_static ValueType local_value;
+      zero_init(local_value);
+      if (this->team_rank() == thread_id) {
+        local_value = value;
+      }
+      this->team_barrier();
+      value = local_value;
+    }
+// Reduce accross a team of threads.
+//
+// Each thread has vector_length elements.
+// This reduction is for TeamThreadRange operations, where the range
+// is spread across threads.  Effectively, there are vector_length
+// independent reduction operations.
+// This is different from a reduction across the elements of a thread,
+// which reduces every vector element.
+
+    template< class ValueType, class JoinOp >
+    KOKKOS_INLINE_FUNCTION
+    ValueType team_reduce( const ValueType & value , const JoinOp & op_in) const
+    {
+      typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ;
+      const JoinOpFunctor op(op_in);
+
+      tile_static ValueType buffer[512];
+      const auto local = lindex();
+      const auto team  = team_rank();
+      auto vector_rank = local%m_vector_length;
+      auto thread_base = team*m_vector_length;
+
+      const std::size_t size = next_pow_2(m_team_size+1)/2;
+#if defined(ROCM15)
+      buffer[local] = value;
+#else
+        // ROCM 1.5 handles address spaces better, previous version didn't
+      lds_for(buffer[local], [&](ValueType& x)
+      {
+          x = value;
+      });
+#endif
+      m_idx.barrier.wait();
+
+      for(std::size_t s = 1; s < size; s *= 2)
+      {
+          const std::size_t index = 2 * s * team;
+          if (index < size)
+          {
+#if defined(ROCM15)
+                op.join(buffer[vector_rank+index*m_vector_length],
+                        buffer[vector_rank+(index+s)*m_vector_length]);
+#else
+              lds_for(buffer[vector_rank+index*m_vector_length], [&](ValueType& x)
+              {
+                  lds_for(buffer[vector_rank+(index+s)*m_vector_length],
+                                [&](ValueType& y)
+                  {
+                      op.join(x, y);
+                  });
+              });
+#endif
+          }
+          m_idx.barrier.wait();
+      }
+
+      if (local == 0)
+      {
+          for(int i=size*m_vector_length; i<m_team_size*m_vector_length; i+=m_vector_length)
+#if defined(ROCM15)
+              op.join(buffer[vector_rank], buffer[vector_rank+i]);
+#else
+              lds_for(buffer[vector_rank], [&](ValueType& x)
+              {
+                  lds_for(buffer[vector_rank+i],
+                                [&](ValueType& y)
+                  {
+                      op.join(x, y);
+                  });
+              });
+#endif
+      }
+      m_idx.barrier.wait();
+
+      return buffer[0];
+    }
+
+
+    /** \brief  Intra-team vector reduce 
+     *          with intra-team non-deterministic ordering accumulation.
+     *
+     *  The intra-team accumulation value will, at the end of the
+     *  league's parallel execution, be the reduction's total.
+     *  Parallel execution ordering of the league's teams is non-deterministic.
+     *  As such the base value for each team's vector reduce operation is
+     *  similarly non-deterministic.
+     */
+    template< class ValueType, class JoinOp >
+    KOKKOS_INLINE_FUNCTION
+    ValueType thread_reduce( const ValueType & value , const JoinOp & op_in) const
+    {
+      typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ;
+      const JoinOpFunctor op(op_in);
+
+      const auto local = m_idx.local[0];
+      tile_static ValueType buffer[512];
+      const std::size_t size = m_vector_length; //vector length must be power of 2
+      auto vector_rank = local%m_vector_length;
+      auto thread_base = team_rank()*m_vector_length;
+      lds_for(buffer[local], [&](ValueType& x)
+      {
+          x = value;
+      });
+      m_idx.barrier.wait();
+      for(std::size_t s = 1; s < size; s *= 2)
+      {
+          const std::size_t index = 2 * s * vector_rank;
+          if (index < size)
+          {
+#if defined(ROCM15)
+              op.join(buffer[thread_base+index], buffer[thread_base+index+s]);
+#else
+
+              lds_for(buffer[thread_base+index], [&](ValueType& x)
+              {
+                  lds_for(buffer[thread_base+index+s], [&](ValueType& y)
+                  {
+                      op.join(x, y);
+                  });
+              });
+#endif
+          }
+          m_idx.barrier.wait();
+      }
+
+      m_idx.barrier.wait();
+      return buffer[thread_base];
+    }
+
+    /** \brief  Intra-team exclusive prefix sum with team_rank() ordering
+     *          with intra-team non-deterministic ordering accumulation.
+     *
+     *  The global inter-team accumulation value will, at the end of the
+     *  league's parallel execution, be the scan's total.
+     *  Parallel execution ordering of the league's teams is non-deterministic.
+     *  As such the base value for each team's scan operation is similarly
+     *  non-deterministic.
+     */
+    template< typename Type >
+    KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum = nullptr ) const
+    {
+  #if 0
+      const auto local = m_idx.local[0];
+      const auto last = m_team_size - 1;
+      const auto init = 0;
+      tile_static Type buffer[256];
+
+      if (local == last) buffer[0] = init;
+      else buffer[local] = value;
+
+      m_idx.barrier.wait();
+
+      for(std::size_t s = 1; s < m_team_size; s *= 2)
+      {
+          if (local >= s) buffer[local] += buffer[local - s];
+          m_idx.barrier.wait();
+      }
+
+      if ( global_accum )
+      { 
+         if(local == last)
+         {
+            atomic_fetch_add(global_accum, buffer[local] + value);
+         }
+         m_idx.barrier.wait();
+         buffer[local] += *global_accum;
+      }
+      m_idx.barrier.wait();
+      return buffer[local];
+#else
+      tile_static Type sarray[2][256+1];
+      int lid = m_idx.local[0];
+      int lp1 = lid+1;
+
+      int toggle = 1;
+      int _toggle = 0;
+      m_idx.barrier.wait();
+
+      if(lid == 0) 
+      {
+         sarray[1][0] = 0;
+         sarray[0][0] = 0;
+      }
+      sarray[1][lp1] = value;
+
+      m_idx.barrier.wait();
+      for(int stride = 1; stride < m_team_size; stride*=2)
+      {
+         if(lid >= stride)
+         {
+            sarray[_toggle][lp1] =
+                          sarray[toggle][lp1]+sarray[toggle][lp1-stride];
+         }
+         else
+         {
+            sarray[_toggle][lp1] = sarray[toggle][lp1];
+         }
+         toggle = _toggle;
+         _toggle = 1-toggle;
+         m_idx.barrier.wait();
+      }
+
+      if ( global_accum )
+      { 
+         if(m_team_size == lp1)
+         {
+            sarray[toggle][m_team_size] = atomic_fetch_add(global_accum,sarray[toggle][m_team_size]);
+         }
+         m_idx.barrier.wait();
+         sarray[toggle][lid] += sarray[toggle][m_team_size];
+      }
+      m_idx.barrier.wait();
+      return sarray[toggle][lid];
+#endif
+    }
+
+  private:
+    int m_league_size ;
+    int m_team_size ;
+    const scratch_memory_space  m_team_shared;
+
+  public:
+    int m_vector_length;
+    hc::tiled_index<1> m_idx;
+  };
+}
+} // namespace Kokkos
+#include <ROCm/Kokkos_ROCm_ReduceScan.hpp>
+
+namespace Kokkos {
+namespace Impl {
+
+//----------------------------------------------------------------------------
+
+template< class FunctorType , class... Traits >
+class ParallelFor< FunctorType
+                 , Kokkos::RangePolicy< Traits... >, Kokkos::Experimental::ROCm >
+{
+private:
+
+  typedef Kokkos::RangePolicy< Traits... > Policy ;
+
+public:
+
+  inline
+  ParallelFor( const FunctorType & f
+             , const Policy      & policy )
+    {
+
+
+      const auto len = policy.end()-policy.begin();
+      const auto offset = policy.begin();
+      if(len == 0) return;
+// define a lambda to work around a compiler issue.  The compiler does not
+// properly dereference f inside the pfe.
+auto foo = [=](size_t i){rocm_invoke<typename Policy::work_tag>(f, i);};
+
+#if __hcc_workweek__ > 16600
+      hc::parallel_for_each(hc::extent<1>(len) , [=](const hc::index<1> & idx) [[hc]]  [[hc_max_workgroup_dim(1024,1,1)]]
+#else
+      hc::parallel_for_each(hc::extent<1>(len).tile(256) , [=](const hc::index<1> & idx) [[hc]]
+#endif
+      {
+        if(idx[0]<len)  // workaround for Carrizo (and Fiji?)
+          foo(idx[0] + offset);
+      }).wait();
+
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+};
+
+//----------------------------------------------------------------------------
+
+template< class F , class... Traits >
+class ParallelFor< F
+                 , Kokkos::TeamPolicy< Traits... >
+                 , Kokkos::Experimental::ROCm >
+{
+  using Policy = Kokkos::Impl::TeamPolicyInternal< Kokkos::Experimental::ROCm, Traits... >;
+  typedef Kokkos::Impl::FunctorValueTraits<F, typename Policy::work_tag> ValueTraits;
+
+public:
+  inline
+  ParallelFor( const F & f
+             , const Policy      & policy )
+    {
+      const auto league_size  = policy.league_size();
+      const auto team_size    = policy.team_size();
+      const int vector_length = policy.vector_length();
+      const auto total_size   = league_size * team_size * vector_length;
+      const int scratch_size0 = policy.scratch_size(0,team_size);
+      const int scratch_size1 = policy.scratch_size(1,team_size);
+
+      if(total_size == 0) return;
+
+      const auto shared_size = FunctorTeamShmemSize< F >::value( f , team_size );
+      char * scratch = NULL;
+      char * shared = (char *)rocm_device_allocate(shared_size * league_size +
+                                                   scratch_size0*league_size);
+      if(0<scratch_size1)
+        scratch = (char *)rocm_device_allocate(scratch_size1*league_size);
+
+      hc::extent< 1 > flat_extent( total_size );
+
+      hc::tiled_extent< 1 > team_extent = flat_extent.tile(team_size*vector_length);
+      hc::parallel_for_each( team_extent , [=](hc::tiled_index<1> idx) [[hc]]
+      {
+        rocm_invoke<typename Policy::work_tag>(f, typename Policy::member_type(idx, league_size, team_size, shared, shared_size, scratch_size0, scratch, scratch_size1,vector_length));
+      }).wait();
+
+      if(0<scratch_size1)
+        rocm_device_free(scratch);
+      rocm_device_free(shared);
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+};
+
+
+//----------------------------------------------------------------------------
+
+template< class FunctorType , class ReducerType, class... Traits >
+class ParallelReduce<
+  FunctorType , Kokkos::RangePolicy< Traits... >, ReducerType, Kokkos::Experimental::ROCm >
+{
+public:
+
+  typedef Kokkos::RangePolicy< Traits... > Policy ;
+
+  // TODO: Use generic lambdas instead
+  struct invoke_fn
+  {
+    template<class F, class... Ts>
+    KOKKOS_INLINE_FUNCTION void operator()(std::size_t size, F&& f, hc::tiled_index<1> idx, tile_desc td, Ts&&... xs) const
+    {
+      auto global = idx.global[0];
+      if (global < size) f(idx.global[0], static_cast<Ts&&>(xs)...);
+    }
+  };
+
+  template< class ViewType >
+  inline
+  ParallelReduce( const FunctorType  & f,
+                  const Policy       & policy,
+                  const ViewType & result_view,
+                  typename std::enable_if<
+                               Kokkos::is_view< ViewType >::value &&
+                              !Kokkos::is_reducer_type<ReducerType>::value
+                  ,void*>::type = NULL)
+    {
+      typedef typename Policy::work_tag Tag;
+      typedef Kokkos::Impl::FunctorValueTraits< FunctorType , Tag > ValueTraits;
+      typedef Kokkos::Impl::FunctorValueInit< FunctorType , Tag > ValueInit;
+      typedef typename ValueTraits::reference_type reference_type;
+
+      const auto total_size = policy.end() - policy.begin();
+
+      if(total_size==0) {
+        if (result_view.data()) {
+           ValueInit::init( f , result_view.data() );
+        }
+        return;
+      }
+
+      Kokkos::Impl::reduce_enqueue< Tag >
+        ( total_size 
+        , f
+        , InvalidType{}
+        , rocm_capture(invoke_fn{}, total_size)
+        , result_view.data()
+        , result_view.extent(0)
+        );
+    }
+
+  inline
+  ParallelReduce( const FunctorType & f,
+                  Policy       policy,
+                  const ReducerType& reducer )
+  {
+      typedef typename Policy::work_tag Tag;
+
+      typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value,                                   FunctorType, ReducerType> ReducerConditional;
+      typedef typename ReducerConditional::type ReducerTypeFwd;
+      typedef Kokkos::Impl::FunctorValueTraits< FunctorType , Tag > ValueTraits;
+      typedef Kokkos::Impl::FunctorValueInit< ReducerType, Tag > ValueInit ;
+
+      typedef typename ValueTraits::reference_type reference_type;
+
+      const auto total_size = policy.end() - policy.begin();
+
+      if(total_size==0) {
+        if (reducer.view().data()) {
+           ValueInit::init( ReducerConditional::select(f,reducer), 
+                            reducer.view().data() );
+        }
+        return;
+      }
+
+      Kokkos::Impl::reduce_enqueue< Tag >
+        ( total_size 
+        , f
+        , reducer
+        , rocm_capture(invoke_fn{}, total_size)
+        , reducer.view().data()
+        , reducer.view().extent(0)
+        );
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+};
+
+template< class FunctorType, class ReducerType, class... Traits >
+class ParallelReduce<
+   FunctorType , Kokkos::TeamPolicy< Traits... >, ReducerType, Kokkos::Experimental::ROCm >
+{
+  using Policy = Kokkos::Impl::TeamPolicyInternal< Kokkos::Experimental::ROCm, Traits... >;
+  typedef Kokkos::Impl::FunctorValueTraits<FunctorType, typename Policy::work_tag> ValueTraits;
+
+public:
+
+  struct invoke_fn
+  {
+    template<class Create, class F, class... Ts>
+    KOKKOS_INLINE_FUNCTION void operator()(Create&& create, F&& f, hc::tiled_index<1> idx, tile_desc td, Ts&&... xs) const
+    {
+      f(create(idx, td), static_cast<Ts&&>(xs)...);
+    }
+  };
+
+  template< class ViewType >
+  inline
+  ParallelReduce( const FunctorType  & f,
+                  const Policy       & policy,
+                  const ViewType     & result_view,
+                typename std::enable_if<
+                  Kokkos::is_view< ViewType >::value &&
+                  !Kokkos::is_reducer_type<ReducerType>::value
+                  ,void*>::type = NULL)
+    {
+      const int league_size = policy.league_size();
+      const int team_size = policy.team_size(f);
+      const int vector_length = policy.vector_length();
+      const int scratch_size0 = policy.scratch_size(0,team_size);
+      const int scratch_size1 = policy.scratch_size(1,team_size);
+      const int total_size = league_size * team_size ;
+
+      if(total_size == 0) return;
+
+      const int reduce_size = ValueTraits::value_size( f );
+      const int shared_size = FunctorTeamShmemSize< FunctorType >::value( f , team_size );
+
+      char * shared;
+      char * scratch = NULL;
+
+      shared = (char *)rocm_device_allocate(league_size *
+                             (shared_size + scratch_size0));
+      if(0<scratch_size1)
+        scratch = (char *)rocm_device_allocate(scratch_size1 * league_size);
+
+      auto create_team_member = [=](hc::tiled_index<1> idx, tile_desc td) 
+      { 
+
+        return typename Policy::member_type(idx, league_size, td.team_size, 
+                                          shared, shared_size, scratch_size0,
+                                          scratch, scratch_size1, 
+                                          vector_length); 
+      };
+
+      Kokkos::Impl::reduce_enqueue< typename Policy::work_tag >
+      ( total_size*vector_length
+        , f
+        , InvalidType{}
+        , rocm_capture(invoke_fn{}, create_team_member)
+        , result_view.ptr_on_device()
+        , result_view.dimension_0()
+        , team_size 
+        , vector_length 
+        , shared_size
+      );
+
+      if(0<scratch_size1)
+        rocm_device_free(scratch);
+      rocm_device_free(shared);
+    }
+
+  inline
+  ParallelReduce( const FunctorType & f,
+                  Policy       policy,
+                  const ReducerType& reducer )
+  {
+    const int league_size = policy.league_size();
+      const int team_size = policy.team_size(f);
+      const int vector_length = policy.vector_length();
+      const int total_size = league_size * team_size;
+
+      if(total_size == 0) return;
+
+      const int reduce_size = ValueTraits::value_size( f );
+      const int shared_size = FunctorTeamShmemSize< FunctorType >::value( f , team_size );
+      const int scratch_size0 = policy.scratch_size(0,team_size);
+      const int scratch_size1 = policy.scratch_size(1,team_size);
+
+      char * shared;
+      char * scratch = NULL;
+      shared = (char *)rocm_device_allocate((shared_size + scratch_size0) *
+                                            league_size);
+      if(0<scratch_size1)
+        scratch = (char *)rocm_device_allocate(scratch_size1 * league_size);
+
+      auto create_team_member = [=](hc::tiled_index<1> idx, tile_desc td) 
+      { 
+        return typename Policy::member_type(idx, league_size, td.tile_size, shared, shared_size, scratch_size0, scratch, scratch_size1, vector_length); 
+      };
+
+      Kokkos::Impl::reduce_enqueue< typename Policy::work_tag >
+      ( league_size
+        , f
+        , reducer
+        , rocm_capture(invoke_fn{}, create_team_member)
+        , reducer.view().data()
+        , reducer.view().extent(0),team_size,vector_length
+        , shared_size
+     );
+
+      if(0<scratch_size1)
+        rocm_device_free(scratch);
+      rocm_device_free(shared);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+};
+
+
+template< class FunctorType , class... Traits >
+class ParallelScan< FunctorType , Kokkos::RangePolicy< Traits... >, Kokkos::Experimental::ROCm >
+{
+private:
+
+  typedef Kokkos::RangePolicy< Traits... > Policy;
+  typedef typename Policy::work_tag Tag;
+  typedef Kokkos::Impl::FunctorValueTraits< FunctorType, Tag>  ValueTraits;
+
+public:
+
+  //----------------------------------------
+
+  inline
+  ParallelScan( const FunctorType & f
+              , const Policy      & policy )
+  {
+    const auto len = policy.end()-policy.begin();
+
+
+    if(len==0) return;
+
+    scan_enqueue<Tag>(len, f, [](hc::tiled_index<1> idx, int, int) { return idx.global[0]; });
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+  //----------------------------------------
+};
+
+template< class FunctorType , class... Traits>
+class ParallelScan< FunctorType , Kokkos::TeamPolicy< Traits... >, Kokkos::Experimental::ROCm >
+{
+private:
+
+  using Policy = Kokkos::Impl::TeamPolicyInternal< Kokkos::Experimental::ROCm, Traits... >;
+  typedef typename Policy::work_tag Tag;
+  typedef Kokkos::Impl::FunctorValueTraits< FunctorType, Tag>  ValueTraits;
+
+public:
+
+  //----------------------------------------
+
+  inline
+  ParallelScan( const FunctorType & f
+              , const Policy      & policy )
+  {
+    const auto league_size = policy.league_size();
+    const auto team_size = policy.team_size(f);
+    const auto len  = league_size * team_size;
+      
+    if(len == 0) return;
+
+    scan_enqueue<Tag>(len, f, [&](hc::tiled_index<1> idx, int n_teams, int n_leagues) { return typename Policy::member_type(idx,n_leagues,n_teams); });
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+  //----------------------------------------
+};
+
+}
+}
+
+namespace Kokkos {
+namespace Impl {
+  template<typename iType>
+  struct TeamThreadRangeBoundariesStruct<iType,ROCmTeamMember> {
+    typedef iType index_type;
+    const iType start;
+    const iType end;
+    const iType increment;
+    const ROCmTeamMember& thread;
+
+#if defined( __HCC_ACCELERATOR__ )
+    KOKKOS_INLINE_FUNCTION
+    TeamThreadRangeBoundariesStruct (const ROCmTeamMember& thread_, const iType& count):
+      start( thread_.team_rank() ),
+      end( count ),
+      increment( thread_.team_size() ),
+      thread(thread_)
+    {}
+    KOKKOS_INLINE_FUNCTION
+    TeamThreadRangeBoundariesStruct (const ROCmTeamMember& thread_,  const iType& begin_, const iType& end_):
+      start( begin_ + thread_.team_rank() ),
+      end( end_ ),
+      increment( thread_.team_size() ),
+      thread(thread_)
+    {}
+#else
+    KOKKOS_INLINE_FUNCTION
+    TeamThreadRangeBoundariesStruct (const ROCmTeamMember& thread_, const iType& count):
+      start( 0 ),
+      end( count ),
+      increment( 1 ),
+      thread(thread_)
+    {}
+    KOKKOS_INLINE_FUNCTION
+    TeamThreadRangeBoundariesStruct (const ROCmTeamMember& thread_,  const iType& begin_, const iType& end_):
+      start( begin_ ),
+      end( end_ ),
+      increment( 1 ),
+      thread(thread_)
+    {}
+#endif
+  };
+  template<typename iType>
+  struct ThreadVectorRangeBoundariesStruct<iType,ROCmTeamMember> {
+    typedef iType index_type;
+    const iType start;
+    const iType end;
+    const iType increment;
+    const ROCmTeamMember& thread;
+
+#if defined( __HCC_ACCELERATOR__ )
+    KOKKOS_INLINE_FUNCTION
+    ThreadVectorRangeBoundariesStruct (const ROCmTeamMember& thread_, const iType& count):
+      start( thread_.lindex()%thread_.vector_length() ),
+      end( count ),
+      increment( thread_.vector_length() ),
+      thread(thread_)
+    {}
+
+//    KOKKOS_INLINE_FUNCTION
+//    ThreadVectorRangeBoundariesStruct (const iType& count):
+//      start( 0 ),
+//      end( count ),
+//      increment( 1 )
+//    {}
+#else
+    KOKKOS_INLINE_FUNCTION
+    ThreadVectorRangeBoundariesStruct (const ROCmTeamMember& thread_, const iType& count):
+      start( 0 ),
+      end( count ),
+      increment( 1 ),
+      thread(thread_)
+    {}
+    KOKKOS_INLINE_FUNCTION
+    ThreadVectorRangeBoundariesStruct (const iType& count):
+      start( 0 ),
+      end( count ),
+      increment( 1 )
+    {}
+#endif
+  };
+
+}
+}
+
+namespace Kokkos {
+
+template<typename iType>
+KOKKOS_INLINE_FUNCTION
+Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>
+  TeamThreadRange(const Impl::ROCmTeamMember& thread, const iType& count) {
+  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>(thread,count);
+}
+
+template<typename iType1,typename iType2>
+KOKKOS_INLINE_FUNCTION
+Impl::TeamThreadRangeBoundariesStruct<typename std::common_type< iType1, iType2 >::type,Impl::ROCmTeamMember>
+  TeamThreadRange(const Impl::ROCmTeamMember& thread, const iType1& begin, const iType2& end) {
+  typedef typename std::common_type< iType1, iType2 >::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>(thread,begin,end);
+}
+
+template<typename iType>
+KOKKOS_INLINE_FUNCTION
+Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >
+  ThreadVectorRange(const Impl::ROCmTeamMember& thread, const iType& count) {
+  return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >(thread,count);
+}
+
+KOKKOS_INLINE_FUNCTION
+Impl::ThreadSingleStruct<Impl::ROCmTeamMember> PerTeam(const Impl::ROCmTeamMember& thread) {
+  return Impl::ThreadSingleStruct<Impl::ROCmTeamMember>(thread);
+}
+
+KOKKOS_INLINE_FUNCTION
+Impl::VectorSingleStruct<Impl::ROCmTeamMember> PerThread(const Impl::ROCmTeamMember& thread) {
+  return Impl::VectorSingleStruct<Impl::ROCmTeamMember>(thread);
+}
+
+template<class FunctorType>
+KOKKOS_INLINE_FUNCTION
+void single(const Impl::VectorSingleStruct<Impl::ROCmTeamMember>& single_struct, const FunctorType& lambda) {
+  if(single_struct.team_member.vector_rank()==0) lambda();
+}
+
+template<class FunctorType>
+KOKKOS_INLINE_FUNCTION
+void single(const Impl::ThreadSingleStruct<Impl::ROCmTeamMember>& single_struct, const FunctorType& lambda) {
+  if((single_struct.team_member.lindex()==0)) lambda();
+}
+
+template<class FunctorType, class ValueType>
+KOKKOS_INLINE_FUNCTION
+void single(const Impl::VectorSingleStruct<Impl::ROCmTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
+#if defined(ROCM15)
+  // 1.5 needs this more proper restriction on which work units run
+  if( single_struct.team_member.vector_rank()==0) lambda(val);
+  val = shfl(val,0,single_struct.team_member.vector_length());
+#else
+  // but older compilers are fine with this (TestTeamVector::Test< Kokkos::Experimental::ROCm >(4))
+  lambda(val);
+#endif
+}
+
+template<class FunctorType, class ValueType>
+KOKKOS_INLINE_FUNCTION
+void single(const Impl::ThreadSingleStruct<Impl::ROCmTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
+  if(single_struct.team_member.lindex()==0) lambda(val);
+  single_struct.team_member.team_broadcast(val,0);
+}
+
+}
+
+namespace Kokkos {
+
+  /** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
+   *
+   * The range i=0..N-1 is mapped to all threads of the the calling thread team.
+   * This functionality requires C++11 support.*/
+template<typename iType, class Lambda>
+KOKKOS_INLINE_FUNCTION
+void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries, const Lambda& lambda) {
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
+    lambda(i);
+}
+
+/** \brief  Inter-thread thread range parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
+ * val is performed and put into result. This functionality requires C++11 support.*/
+template< typename iType, class Lambda, typename ValueType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries,
+                     const Lambda & lambda, ValueType& result) {
+
+  result = ValueType();
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    ValueType tmp = ValueType();
+    lambda(i,tmp);
+    result+=tmp;
+  }
+  result = loop_boundaries.thread.team_reduce(result,
+                                              Impl::JoinAdd<ValueType>());
+//  Impl::rocm_intra_workgroup_reduction( loop_boundaries.thread, result,
+//               Impl::JoinAdd<ValueType>());
+//  Impl::rocm_inter_workgroup_reduction( loop_boundaries.thread, result,
+//               Impl::JoinAdd<ValueType>());
+}
+
+/** \brief  Intra-thread thread range parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
+ * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
+ * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
+ * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
+ * '1 for *'). This functionality requires C++11 support.*/
+template< typename iType, class Lambda, typename ValueType, class JoinType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries,
+                     const Lambda & lambda, const JoinType& join, ValueType& result) {
+
+#if defined(ROCM15)
+  ValueType tmp = result;
+  //  Simpler code works with ROCM1.5
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i,tmp);
+  }
+  result = loop_boundaries.thread.team_reduce(tmp,join);
+#else
+  // this workaround freezes up with ROCM1.5, but needed for earlier compilers
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    ValueType tmp = ValueType();
+    lambda(i,tmp);
+    join(result,tmp);
+  }
+  result = loop_boundaries.thread.team_reduce(result,join);
+#endif
+//  Impl::rocm_intra_workgroup_reduction( loop_boundaries.thread, result,join);
+//  Impl::rocm_inter_workgroup_reduction( loop_boundaries.thread, result,join);
+}
+
+} //namespace Kokkos
+
+
+namespace Kokkos {
+/** \brief  Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
+ * This functionality requires C++11 support.*/
+template<typename iType, class Lambda>
+KOKKOS_INLINE_FUNCTION
+void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
+    loop_boundaries, const Lambda& lambda) {
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
+    lambda(i);
+}
+
+/** \brief  Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
+ * val is performed and put into result. This functionality requires C++11 support.*/
+template< typename iType, class Lambda, typename ValueType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
+      loop_boundaries, const Lambda & lambda, ValueType& result) {
+  result = ValueType();
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    ValueType tmp = ValueType();
+    lambda(i,tmp);
+    result+=tmp;
+  }
+  result = loop_boundaries.thread.thread_reduce(result,Impl::JoinAdd<ValueType>());
+}
+
+/** \brief  Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
+ * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
+ * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
+ * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
+ * '1 for *'). This functionality requires C++11 support.*/
+template< typename iType, class Lambda, typename ValueType, class JoinType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
+      loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& result) {
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i,result);  
+    loop_boundaries.thread.team_barrier();
+  }
+  result = loop_boundaries.thread.thread_reduce(result,join);
+}
+
+/** \brief  Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
+ *          for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
+ * Depending on the target execution space the operator might be called twice: once with final=false
+ * and once with final=true. When final==true val contains the prefix sum value. The contribution of this
+ * "i" needs to be added to val no matter whether final==true or not. In a serial execution
+ * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
+ * to the final sum value over all vector lanes.
+ * This functionality requires C++11 support.*/
+template< typename iType, class FunctorType >
+KOKKOS_INLINE_FUNCTION
+void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
+      loop_boundaries, const FunctorType & lambda) {
+
+  typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
+  typedef typename ValueTraits::value_type value_type ;
+
+  value_type scan_val = value_type();
+#if (__ROCM_ARCH__ >= 800)
+// adopt the cuda vector shuffle method
+  const int VectorLength = loop_boundaries.increment;
+  int lid = loop_boundaries.thread.lindex();
+  int vector_rank = lid%VectorLength;
+
+  iType loop_bound = ((loop_boundaries.end+VectorLength-1)/VectorLength) * VectorLength;
+  value_type val ;
+  for(int _i = vector_rank; _i < loop_bound; _i += VectorLength) {
+    val = value_type();
+    if(_i<loop_boundaries.end)
+      lambda(_i , val , false);
+
+    value_type tmp = val;
+    value_type result_i;
+
+    if(vector_rank == 0)
+      result_i = tmp;
+    if (VectorLength > 1) {
+      const value_type tmp2 = shfl_up(tmp, 1,VectorLength);
+      if(vector_rank > 0)
+        tmp+=tmp2;
+    }
+    if(vector_rank == 1)
+      result_i = tmp;
+    if (VectorLength > 3) {
+      const value_type tmp2 = shfl_up(tmp, 2,VectorLength);
+      if(vector_rank > 1)
+        tmp+=tmp2;
+    }
+    if ((vector_rank >= 2) &&
+        (vector_rank < 4))
+      result_i = tmp;
+    if (VectorLength > 7) {
+      const value_type tmp2 = shfl_up(tmp, 4,VectorLength);
+      if(vector_rank > 3)
+        tmp+=tmp2;
+    }
+    if ((vector_rank >= 4) &&
+        (vector_rank < 8))
+      result_i = tmp;
+    if (VectorLength > 15) {
+      const value_type tmp2 = shfl_up(tmp, 8,VectorLength);
+      if(vector_rank > 7)
+        tmp+=tmp2;
+    }
+    if ((vector_rank >= 8) &&
+        (vector_rank < 16))
+      result_i = tmp;
+    if (VectorLength > 31) {
+      const value_type tmp2 = shfl_up(tmp, 16,VectorLength);
+      if(vector_rank > 15)
+        tmp+=tmp2;
+    }
+    if ((vector_rank >=16) &&
+        (vector_rank < 32))
+      result_i = tmp;
+    if (VectorLength > 63) {
+      const value_type tmp2 = shfl_up(tmp, 32,VectorLength);
+      if(vector_rank > 31)
+        tmp+=tmp2;
+    }
+
+    if (vector_rank >= 32)
+      result_i = tmp;
+
+    val = scan_val + result_i - val;
+    scan_val += shfl(tmp,VectorLength-1,VectorLength);
+    if(_i<loop_boundaries.end)
+      lambda(_i , val , true);
+  }
+#else
+// for kaveri, call the LDS based thread_scan routine
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i,scan_val,true);
+  }
+  scan_val = loop_boundaries.thread.team_scan(scan_val);
+
+#endif
+}
+
+} // namespace Kokkos
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0b7a1e2583d1b88953da2524885317662b3cbd99
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
@@ -0,0 +1,193 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+// AMP REDUCE
+//////////////////////////////////////////////////////////////////////////////
+
+#if !defined( KOKKOS_ROCM_AMP_REDUCE_INL )
+#define KOKKOS_ROCM_AMP_REDUCE_INL
+
+#include <iostream>
+
+#include <algorithm>
+#include <numeric>
+#include <cmath>
+#include <type_traits>
+#include <ROCm/Kokkos_ROCm_Tile.hpp>
+#include <ROCm/Kokkos_ROCm_Invoke.hpp>
+#include <ROCm/Kokkos_ROCm_Join.hpp>
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Kokkos {
+namespace Impl {
+
+template<class T>
+T* reduce_value(T* x, std::true_type) [[hc]]
+{
+  return x;
+}
+
+template<class T>
+T& reduce_value(T* x, std::false_type) [[hc]]
+{
+  return *x;
+}
+
+#if KOKKOS_ROCM_HAS_WORKAROUNDS
+struct always_true
+{
+    template<class... Ts>
+    bool operator()(Ts&&...) const
+    {
+        return true;
+    }
+};
+#endif
+
+template< class Tag, class F, class ReducerType, class Invoker, class T >
+void reduce_enqueue(
+  const int szElements,  // size of the extent
+  const F & f,
+  const ReducerType& reducer,
+  Invoker invoke,
+  T * const output_result,
+  int const output_length,
+  const int team_size=64,
+  const int vector_size=1,
+  int const shared_size=0)
+{
+  using namespace hc ;
+
+  typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, F, ReducerType> ReducerConditional;
+  typedef typename ReducerConditional::type ReducerTypeFwd;
+
+  typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , Tag > ValueTraits ;
+  typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , Tag >   ValueInit ;
+  typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , Tag >   ValueJoin ;
+  typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , Tag >       ValueFinal ;
+
+  typedef typename ValueTraits::pointer_type   pointer_type ;
+  typedef typename ValueTraits::reference_type reference_type ;
+
+  if (output_length < 1) return;
+
+  assert(output_result != nullptr);
+  const auto td = get_tile_desc<T>(szElements,output_length,team_size,vector_size, shared_size);
+
+  // allocate host and device memory for the results from each team
+  std::vector<T> result_cpu(td.num_tiles*output_length);
+  hc::array<T> result(td.num_tiles*output_length);
+
+  auto fut = tile_for<T[]>(td, [=,&result](hc::tiled_index<1> t_idx, tile_buffer<T[]> buffer) [[hc]] 
+  {
+      const auto local = t_idx.local[0];
+      const auto global = t_idx.global[0];
+      const auto tile = t_idx.tile[0];
+
+      buffer.action_at(local, [&](T* state)
+      {
+          ValueInit::init(ReducerConditional::select(f, reducer), state);
+          invoke(make_rocm_invoke_fn<Tag>(f), t_idx, td, reduce_value(state, std::is_pointer<reference_type>()));
+      });
+      t_idx.barrier.wait();
+
+      // Reduce within a tile using multiple threads.
+// even though buffer.size is always 64, the value 64 must be hard coded below
+// due to a compiler bug
+//      for(std::size_t s = 1; s < buffer.size(); s *= 2)
+      for(std::size_t s = 1; s < 64; s *= 2)
+      {
+          const std::size_t index = 2 * s * local;
+//          if (index < buffer.size())
+          if (index < 64)
+          {
+              buffer.action_at(index, index + s, [&](T* x, T* y)
+              {
+                  ValueJoin::join(ReducerConditional::select(f, reducer), x, y);
+              });
+          }
+          t_idx.barrier.wait();
+      }
+
+      // Store the tile result in the global memory.
+      if (local == 0)
+      {
+#if KOKKOS_ROCM_HAS_WORKAROUNDS
+          // Workaround for assigning from LDS memory: std::copy should work
+          // directly
+          buffer.action_at(0, [&](T* x)
+          {
+#if ROCM15
+// new ROCM 15 address space changes aren't implemented in std algorithms yet
+              auto * src = reinterpret_cast<char *>(x);
+              auto * dest = reinterpret_cast<char *>(result.data()+tile*output_length);
+              for(int i=0; i<sizeof(T);i++) dest[i] = src[i];
+#else
+              // Workaround: copy_if used to avoid memmove
+              std::copy_if(x, x+output_length, result.data()+tile*output_length, always_true{} );
+#endif
+          });
+#else
+          std::copy(buffer, buffer+output_length, result.data()+tile*output_length);
+
+#endif
+      }
+      
+  });
+  ValueInit::init(ReducerConditional::select(f, reducer), output_result);
+  fut.wait();
+
+  copy(result,result_cpu.data());
+  for(std::size_t i=0;i<td.num_tiles;i++)
+    ValueJoin::join(ReducerConditional::select(f, reducer), output_result, result_cpu.data()+i*output_length);
+
+  ValueFinal::final( ReducerConditional::select(f, reducer) , output_result );
+
+}
+
+}} //end of namespace Kokkos::Impl
+
+#endif /* #if !defined( KOKKOS_ROCM_AMP_REDUCE_INL ) */
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..309b38d69ffcd221f18707ccd42e8a897f61c5f9
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp
@@ -0,0 +1,605 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_ROCM_REDUCESCAN_HPP
+#define KOKKOS_ROCM_REDUCESCAN_HPP
+
+#include <Kokkos_Macros.hpp>
+
+/* only compile this file if ROCM is enabled for Kokkos */
+#if defined( __HCC__ ) && defined( KOKKOS_ENABLE_ROCM )
+
+//#include <utility>
+
+#include <Kokkos_Parallel.hpp>
+#include <impl/Kokkos_FunctorAdapter.hpp>
+#include <impl/Kokkos_Error.hpp>
+#include <ROCm/Kokkos_ROCm_Vectorization.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+//----------------------------------------------------------------------------
+
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+void rocm_shfl( T & out , T const & in , int lane ,
+  typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
+{
+  *reinterpret_cast<int*>(&out) =
+    __shfl( *reinterpret_cast<int const *>(&in) , lane , width );
+}
+
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+void rocm_shfl( T & out , T const & in , int lane ,
+  typename std::enable_if
+    < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) )
+    , int >::type width )
+{
+  enum : int { N = sizeof(T) / sizeof(int) };
+
+  for ( int i = 0 ; i < N ; ++i ) {
+    reinterpret_cast<int*>(&out)[i] =
+      __shfl( reinterpret_cast<int const *>(&in)[i] , lane , width );
+  }
+}
+
+//----------------------------------------------------------------------------
+
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+void rocm_shfl_down( T & out , T const & in , int delta ,
+  typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
+{
+  *reinterpret_cast<int*>(&out) =
+    __shfl_down( *reinterpret_cast<int const *>(&in) , delta , width );
+}
+
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+void rocm_shfl_down( T & out , T const & in , int delta ,
+  typename std::enable_if
+    < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) )
+    , int >::type width )
+{
+  enum : int { N = sizeof(T) / sizeof(int) };
+
+  for ( int i = 0 ; i < N ; ++i ) {
+    reinterpret_cast<int*>(&out)[i] =
+      __shfl_down( reinterpret_cast<int const *>(&in)[i] , delta , width );
+  }
+}
+
+//----------------------------------------------------------------------------
+
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+void rocm_shfl_up( T & out , T const & in , int delta ,
+  typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
+{
+  *reinterpret_cast<int*>(&out) =
+    __shfl_up( *reinterpret_cast<int const *>(&in) , delta , width );
+}
+
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+void rocm_shfl_up( T & out , T const & in , int delta ,
+  typename std::enable_if
+    < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) )
+    , int >::type width )
+{
+  enum : int { N = sizeof(T) / sizeof(int) };
+
+  for ( int i = 0 ; i < N ; ++i ) {
+    reinterpret_cast<int*>(&out)[i] =
+      __shfl_up( reinterpret_cast<int const *>(&in)[i] , delta , width );
+  }
+}
+#if 0
+//----------------------------------------------------------------------------
+/** \brief  Reduce within a workgroup over team.vector_length(), the "vector" dimension.
+ *
+ *  This will be called within a nested, intra-team parallel operation.
+ *  Use shuffle operations to avoid conflicts with shared memory usage.
+ *
+ *  Requires:
+ *    team.vector_length() is power of 2
+ *    team.vector_length() <= 32 (one workgroup)
+ *
+ *  Cannot use "butterfly" pattern because floating point
+ *  addition is non-associative.  Therefore, must broadcast
+ *  the final result.
+ */
+template< class Reducer >
+KOKKOS_INLINE_FUNCTION
+void rocm_intra_workgroup_vector_reduce( Reducer const & reducer )
+{
+  static_assert(
+    std::is_reference< typename Reducer::reference_type >::value , "" );
+
+  if ( 1 < team.vector_length() ) {
+
+    typename Reducer::value_type tmp ;
+
+    for ( int i = team.vector_length() ; ( i >>= 1 ) ; ) {
+
+      rocm_shfl_down( tmp , reducer.reference() , i , team.vector_length() );
+
+      if ( team.vector_rank() < i ) { reducer.join( reducer.data() , & tmp ); }
+    }
+
+    // Broadcast from root "lane" to all other "lanes"
+
+    rocm_shfl( reducer.reference() , reducer.reference() , 0 , team.vector_length() );
+  }
+}
+
+/** \brief  Inclusive scan over team.vector_length(), the "vector" dimension.
+ *
+ *  This will be called within a nested, intra-team parallel operation.
+ *  Use shuffle operations to avoid conflicts with shared memory usage.
+ *
+ *  Algorithm is concurrent bottom-up reductions in triangular pattern
+ *  where each ROCM thread is the root of a reduction tree from the
+ *  zeroth ROCM thread to itself.
+ *
+ *  Requires:
+ *    team.vector_length() is power of 2
+ *    team.vector_length() <= 32 (one workgroup)
+ */
+template< typename ValueType >
+KOKKOS_INLINE_FUNCTION
+void rocm_intra_workgroup_vector_inclusive_scan( ValueType & local )
+{
+  ValueType tmp ;
+
+  // Bottom up:
+  //   [t] += [t-1] if t >= 1
+  //   [t] += [t-2] if t >= 2
+  //   [t] += [t-4] if t >= 4
+  // ...
+
+  for ( int i = 1 ; i < team.vector_length() ; i <<= 1 ) {
+
+    rocm_shfl_up( tmp , local , i , team.vector_length() );
+
+    if ( i <= team.vector_rank() ) { local += tmp ; }
+  }
+}
+#endif
+
+//----------------------------------------------------------------------------
+/*
+ *  Algorithmic constraints:
+ *   (a) threads with same team.team_rank() have same value
+ *   (b) team.vector_length() == power of two
+ *   (c) blockDim.z == 1
+ */
+
+template< class ValueType , class JoinOp>
+KOKKOS_INLINE_FUNCTION
+void rocm_intra_workgroup_reduction( const ROCmTeamMember& team, 
+                                       ValueType& result,
+                                       const JoinOp& join) {
+
+  unsigned int shift = 1;
+  int max_active_thread = team.team_size();
+
+  //Reduce over values from threads with different team.team_rank()
+  while(team.vector_length() * shift < 32 ) {
+    const ValueType tmp = shfl_down(result, team.vector_length()*shift,32u);
+    //Only join if upper thread is active (this allows non power of two for team.team_size()
+    if(team.team_rank() + shift < max_active_thread)
+      join(result , tmp);
+    shift*=2;
+  }
+
+  result = shfl(result,0,32);
+}
+
+template< class ValueType , class JoinOp>
+KOKKOS_INLINE_FUNCTION
+void rocm_inter_workgroup_reduction( const ROCmTeamMember& team,
+                                       ValueType& value,
+                                       const JoinOp& join) {
+
+  #define STEP_WIDTH 4
+  
+  tile_static ValueType sh_result[256];
+  int max_active_thread = team.team_size();
+  ValueType* result = (ValueType*) & sh_result;
+  const unsigned step = 256 / team.vector_length();
+  unsigned shift = STEP_WIDTH;
+  const int id = team.team_rank()%step==0?team.team_rank()/step:65000;
+  if(id < STEP_WIDTH ) {
+    result[id] = value;
+  }
+  team.team_barrier();
+
+  while (shift<=max_active_thread/step) {
+    if(shift<=id && shift+STEP_WIDTH>id && team.vector_rank()==0) {
+      join(result[id%STEP_WIDTH],value);
+    }
+    team.team_barrier();
+    shift+=STEP_WIDTH;
+  }
+
+
+  value = result[0];
+  for(int i = 1; (i*step<max_active_thread) && i<STEP_WIDTH; i++)
+    join(value,result[i]);
+}
+
+#if 0
+template< class ValueType , class JoinOp>
+KOKKOS_INLINE_FUNCTION
+void rocm_intra_block_reduction( ROCmTeamMember& team,
+                                        ValueType& value,
+                                        const JoinOp& join,
+                                        const int max_active_thread) {
+  rocm_intra_workgroup_reduction(team,value,join,max_active_thread);
+  rocm_inter_workgroup_reduction(team,value,join,max_active_thread);
+}
+
+template< class FunctorType , class JoinOp , class ArgTag = void >
+KOKKOS_INLINE_FUNCTION
+bool rocm_inter_block_reduction( ROCmTeamMember& team,
+                                 typename FunctorValueTraits< FunctorType , ArgTag >::reference_type  value,
+                                 typename FunctorValueTraits< FunctorType , ArgTag >::reference_type  neutral,
+                                 const JoinOp& join,
+                                 ROCm::size_type * const m_scratch_space,
+                                 typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type const result,
+                                 ROCm::size_type * const m_scratch_flags,
+                                 const int max_active_thread) {
+#ifdef __ROCM_ARCH__
+  typedef typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type pointer_type;
+  typedef typename FunctorValueTraits< FunctorType , ArgTag >::value_type value_type;
+
+  //Do the intra-block reduction with shfl operations and static shared memory
+  rocm_intra_block_reduction(value,join,max_active_thread);
+
+  const unsigned id = team.team_rank()*team.vector_length() + team.vector_rank();
+
+  //One thread in the block writes block result to global scratch_memory
+  if(id == 0 ) {
+    pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x;
+    *global = value;
+  }
+
+  //One workgroup of last block performs inter block reduction through loading the block values from global scratch_memory
+  bool last_block = false;
+
+  team.team_barrier();
+  if ( id < 32 ) {
+    ROCm::size_type count;
+
+    //Figure out whether this is the last block
+    if(id == 0)
+      count = Kokkos::atomic_fetch_add(m_scratch_flags,1);
+    count = Kokkos::shfl(count,0,32);
+
+    //Last block does the inter block reduction
+    if( count == gridDim.x - 1) {
+      //set flag back to zero
+      if(id == 0)
+        *m_scratch_flags = 0;
+      last_block = true;
+      value = neutral;
+
+      pointer_type const volatile global = (pointer_type) m_scratch_space ;
+
+      //Reduce all global values with splitting work over threads in one workgroup
+      const int step_size = team.vector_length()*team.team_size() < 32 ? team.vector_length()*team.team_size() : 32;
+      for(int i=id; i<gridDim.x; i+=step_size) {
+        value_type tmp = global[i];
+        join(value, tmp);
+      }
+
+      //Perform shfl reductions within the workgroup only join if contribution is valid (allows gridDim.x non power of two and <32)
+      if (team.vector_length()*team.team_size() > 1) {
+        value_type tmp = Kokkos::shfl_down(value, 1,32);
+        if( id + 1 < gridDim.x )
+          join(value, tmp);
+      }
+      if (team.vector_length()*team.team_size() > 2) {
+        value_type tmp = Kokkos::shfl_down(value, 2,32);
+        if( id + 2 < gridDim.x )
+          join(value, tmp);
+      }
+      if (team.vector_length()*team.team_size() > 4) {
+        value_type tmp = Kokkos::shfl_down(value, 4,32);
+        if( id + 4 < gridDim.x )
+          join(value, tmp);
+      }
+      if (team.vector_length()*team.team_size() > 8) {
+        value_type tmp = Kokkos::shfl_down(value, 8,32);
+        if( id + 8 < gridDim.x )
+          join(value, tmp);
+      }
+      if (team.vector_length()*team.team_size() > 16) {
+        value_type tmp = Kokkos::shfl_down(value, 16,32);
+        if( id + 16 < gridDim.x )
+          join(value, tmp);
+      }
+    }
+  }
+
+  //The last block has in its thread=0 the global reduction value through "value"
+  return last_block;
+#else
+  return true;
+#endif
+}
+#endif
+#if 0
+
+//----------------------------------------------------------------------------
+// See section B.17 of ROCm C Programming Guide Version 3.2
+// for discussion of
+//   __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
+// function qualifier which could be used to improve performance.
+//----------------------------------------------------------------------------
+// Maximize shared memory and minimize L1 cache:
+//   rocmFuncSetCacheConfig(MyKernel, rocmFuncCachePreferShared );
+// For 2.0 capability: 48 KB shared and 16 KB L1
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+/*
+ *  Algorithmic constraints:
+ *   (a) team.team_size() is a power of two
+ *   (b) team.team_size() <= 512
+ *   (c) team.vector_length() == blockDim.z == 1
+ */
+
+template< bool DoScan , class FunctorType , class ArgTag >
+KOKKOS_INLINE_FUNCTION
+void rocm_intra_block_reduce_scan( const FunctorType & functor ,
+                                   const typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type base_data )
+{
+  typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
+  typedef FunctorValueJoin<   FunctorType , ArgTag >  ValueJoin ;
+
+  typedef typename ValueTraits::pointer_type  pointer_type ;
+
+  const unsigned value_count   = ValueTraits::value_count( functor );
+  const unsigned BlockSizeMask = team.team_size() - 1 ;
+
+  // Must have power of two thread count
+
+  if ( BlockSizeMask & team.team_size() ) { Kokkos::abort("ROCm::rocm_intra_block_scan requires power-of-two blockDim"); }
+
+#define BLOCK_REDUCE_STEP( R , TD , S )  \
+  if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
+
+#define BLOCK_SCAN_STEP( TD , N , S )  \
+  if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); }
+
+  const unsigned     rtid_intra = team.team_rank() ^ BlockSizeMask ;
+  const pointer_type tdata_intra = base_data + value_count * team.team_rank() ;
+
+  { // Intra-workgroup reduction:
+    BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0)
+    BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1)
+    BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2)
+    BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3)
+    BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4)
+  }
+
+  team.team_barrier(); // Wait for all workgroups to reduce
+
+  { // Inter-workgroup reduce-scan by a single workgroup to avoid extra synchronizations
+    const unsigned rtid_inter = ( team.team_rank() ^ BlockSizeMask ) << ROCmTraits::WarpIndexShift ;
+
+    if ( rtid_inter < team.team_size() ) {
+
+      const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
+
+      if ( (1<<5) < BlockSizeMask ) {                        BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
+      if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
+      if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
+      if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
+
+      if ( DoScan ) {
+
+        int n = ( rtid_inter &  32 ) ?  32 : (
+                ( rtid_inter &  64 ) ?  64 : (
+                ( rtid_inter & 128 ) ? 128 : (
+                ( rtid_inter & 256 ) ? 256 : 0 )));
+
+        if ( ! ( rtid_inter + n < team.team_size() ) ) n = 0 ;
+
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,8)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,7)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,6)
+        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5)
+      }
+    }
+  }
+
+  team.team_barrier(); // Wait for inter-workgroup reduce-scan to complete
+
+  if ( DoScan ) {
+    int n = ( rtid_intra &  1 ) ?  1 : (
+            ( rtid_intra &  2 ) ?  2 : (
+            ( rtid_intra &  4 ) ?  4 : (
+            ( rtid_intra &  8 ) ?  8 : (
+            ( rtid_intra & 16 ) ? 16 : 0 ))));
+
+    if ( ! ( rtid_intra + n < team.team_size() ) ) n = 0 ;
+    #ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
+    BLOCK_SCAN_STEP(tdata_intra,n,4) team.team_barrier();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,3) team.team_barrier();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,2) team.team_barrier();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,1) team.team_barrier();//__threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,0) team.team_barrier();
+    #else
+    BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
+    BLOCK_SCAN_STEP(tdata_intra,n,0) __threadfence_block();
+    #endif
+  }
+
+#undef BLOCK_SCAN_STEP
+#undef BLOCK_REDUCE_STEP
+}
+
+//----------------------------------------------------------------------------
+/**\brief  Input value-per-thread starting at 'shared_data'.
+ *         Reduction value at last thread's location.
+ *
+ *  If 'DoScan' then write blocks' scan values and block-groups' scan values.
+ *
+ *  Global reduce result is in the last threads' 'shared_data' location.
+ */
+template< bool DoScan , class FunctorType , class ArgTag >
+KOKKOS_INLINE_FUNCTION
+bool rocm_single_inter_block_reduce_scan( const FunctorType     & functor ,
+                                          const ROCm::size_type   block_id ,
+                                          const ROCm::size_type   block_count ,
+                                          ROCm::size_type * const shared_data ,
+                                          ROCm::size_type * const global_data ,
+                                          ROCm::size_type * const global_flags )
+{
+  typedef ROCm::size_type                  size_type ;
+  typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
+  typedef FunctorValueJoin<   FunctorType , ArgTag >  ValueJoin ;
+  typedef FunctorValueInit<   FunctorType , ArgTag >  ValueInit ;
+  typedef FunctorValueOps<    FunctorType , ArgTag >  ValueOps ;
+
+  typedef typename ValueTraits::pointer_type    pointer_type ;
+  typedef typename ValueTraits::reference_type  reference_type ;
+  typedef typename ValueTraits::value_type      value_type ;
+
+  // '__ffs' = position of the least significant bit set to 1.
+  // 'team.team_size()' is guaranteed to be a power of two so this
+  // is the integral shift value that can replace an integral divide.
+  const unsigned BlockSizeShift = __ffs( team.team_size() ) - 1 ;
+  const unsigned BlockSizeMask  = team.team_size() - 1 ;
+
+  // Must have power of two thread count
+  if ( BlockSizeMask & team.team_size() ) { Kokkos::abort("ROCm::rocm_single_inter_block_reduce_scan requires power-of-two blockDim"); }
+
+  const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
+    word_count( ValueTraits::value_size( functor ) / sizeof(size_type) );
+
+  // Reduce the accumulation for the entire block.
+  rocm_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
+
+  {
+    // Write accumulation total to global scratch space.
+    // Accumulation total is the last thread's data.
+    size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
+    size_type * const global = global_data + word_count.value * block_id ;
+
+#if (__ROCM_ARCH__ < 500)
+    for ( size_type i = team.team_rank() ; i < word_count.value ; i += team.team_size() ) { global[i] = shared[i] ; }
+#else
+    for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; }
+#endif
+
+  }
+
+  // Contributing blocks note that their contribution has been completed via an atomic-increment flag
+  // If this block is not the last block to contribute to this group then the block is done.
+    team.team_barrier();
+  const bool is_last_block =
+    ! team.team_reduce( team.team_rank() ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) ,Impl::JoinAdd<ValueType>());
+
+  if ( is_last_block ) {
+
+    const size_type b = ( long(block_count) * long(team.team_rank()) ) >> BlockSizeShift ;
+    const size_type e = ( long(block_count) * long( team.team_rank() + 1 ) ) >> BlockSizeShift ;
+
+    {
+      void * const shared_ptr = shared_data + word_count.value * team.team_rank() ;
+      reference_type shared_value = ValueInit::init( functor , shared_ptr );
+
+      for ( size_type i = b ; i < e ; ++i ) {
+        ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
+      }
+    }
+
+    rocm_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
+
+    if ( DoScan ) {
+
+      size_type * const shared_value = shared_data + word_count.value * ( team.team_rank() ? team.team_rank() - 1 : team.team_size() );
+
+      if ( ! team.team_rank() ) { ValueInit::init( functor , shared_value ); }
+
+      // Join previous inclusive scan value to each member
+      for ( size_type i = b ; i < e ; ++i ) {
+        size_type * const global_value = global_data + word_count.value * i ;
+        ValueJoin::join( functor , shared_value , global_value );
+        ValueOps ::copy( functor , global_value , shared_value );
+      }
+    }
+  }
+
+  return is_last_block ;
+}
+
+// Size in bytes required for inter block reduce or scan
+template< bool DoScan , class FunctorType , class ArgTag >
+inline
+unsigned rocm_single_inter_block_reduce_scan_shmem( const FunctorType & functor , const unsigned BlockSize )
+{
+  return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor );
+}
+#endif 
+
+} // namespace Impl
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #if defined( __ROCMCC__ ) */
+#endif /* KOKKOS_ROCM_REDUCESCAN_HPP */
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..acf75f6f1366fbc618e075ded29e52f7aa1a742c
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
@@ -0,0 +1,157 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <ROCm/Kokkos_ROCm_Invoke.hpp>
+#include <ROCm/Kokkos_ROCm_Join.hpp>
+
+namespace Kokkos {
+namespace Impl {
+
+template< class Tag, class F, class TransformIndex>
+void scan_enqueue(
+  const int len,
+  const F & f,
+  TransformIndex transform_index)
+{
+    typedef Kokkos::Impl::FunctorValueTraits< F, Tag>  ValueTraits;
+    typedef Kokkos::Impl::FunctorValueInit<   F, Tag>  ValueInit;
+    typedef Kokkos::Impl::FunctorValueJoin<   F, Tag>  ValueJoin;
+    typedef Kokkos::Impl::FunctorValueOps<    F, Tag>  ValueOps;
+
+    typedef typename ValueTraits::value_type    value_type;
+    typedef typename ValueTraits::pointer_type    pointer_type;
+    typedef typename ValueTraits::reference_type  reference_type;
+
+    const auto td = get_tile_desc<value_type>(len);
+    std::vector<value_type> result_cpu(td.num_tiles);
+    hc::array<value_type> result(td.num_tiles);
+    hc::array<value_type> scratch(len);
+
+    tile_for<value_type>(td, [&,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] 
+    {
+        const auto local = t_idx.local[0];
+        const auto global = t_idx.global[0];
+        const auto tile = t_idx.tile[0];
+
+        // Join tile buffer elements
+        const auto join = [&](std::size_t i, std::size_t j)
+        {
+            buffer.action_at(i, j, [&](value_type& x, const value_type& y)
+            {
+                ValueJoin::join(f, &x, &y);
+            });
+        };
+
+        // Copy into tile
+        buffer.action_at(local, [&](value_type& state)
+        {
+            ValueInit::init(f, &state);
+            if (global < len) rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), state, false);
+        });
+        t_idx.barrier.wait();
+        // Up sweep phase
+        for(std::size_t d=1;d<buffer.size();d*=2)
+        {
+            auto d2 = 2*d;
+            auto i = local*d2;
+            if(i<len)
+            {
+               auto j = i + d - 1;
+               auto k = i + d2 - 1;
+//               join(k, j);  // no longer needed with ROCm 1.6
+               ValueJoin::join(f, &buffer[k], &buffer[j]);
+            }
+        }
+        t_idx.barrier.wait();
+
+        result[tile] = buffer[buffer.size()-1];
+        buffer[buffer.size()-1] = 0;
+        // Down sweep phase
+        for(std::size_t d=buffer.size()/2;d>0;d/=2)
+        {
+            auto d2 = 2*d;
+            auto i = local*d2;
+            if(i<len)
+            {
+               auto j = i + d - 1;
+               auto k = i + d2 - 1;
+               auto t = buffer[k];
+//               join(k, j);  // no longer needed with ROCm 1.6
+               ValueJoin::join(f, &buffer[k], &buffer[j]);
+               buffer[j] = t;
+            }
+            t_idx.barrier.wait();
+        }
+        // Copy tiles into global memory
+        if (global < len) scratch[global] = buffer[local];
+    }).wait();
+    copy(result,result_cpu.data());
+
+//  The std::partial_sum was segfaulting, despite that this is cpu code.
+//   if(td.num_tiles>1)
+//      std::partial_sum(result_cpu.data(), result_cpu.data()+(td.num_tiles-1)*sizeof(value_type), result_cpu.data(), make_join_operator<ValueJoin>(f));
+// use this implementation instead.
+   for(int i=1; i<td.num_tiles; i++)
+      ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]);
+
+    copy(result_cpu.data(),result);
+    hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,len,td](hc::tiled_index<1> t_idx) [[hc]] 
+    {
+//        const auto local = t_idx.local[0];
+        const auto global = t_idx.global[0];
+        const auto tile = t_idx.tile[0];
+
+        if (global < len) 
+        {
+            auto final_state = scratch[global];
+
+// the join is locking up, at least with 1.6
+            if (tile != 0) final_state += result[tile-1];
+//            if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]);
+            rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), final_state, true);
+        }
+    }).wait();
+}
+
+} // namespace Impl
+} // namespace Kokkos
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e2b673807641b718b30aaa2fdd76e26a064de3de
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp
@@ -0,0 +1,726 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#include <stdlib.h>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <algorithm>
+#include <atomic>
+#include <Kokkos_Macros.hpp>
+
+/* only compile this file if ROCM is enabled for Kokkos */
+#ifdef KOKKOS_ENABLE_ROCM
+
+#include <Kokkos_Core.hpp>
+#include <Kokkos_ROCm.hpp>
+#include <Kokkos_ROCmSpace.hpp>
+
+#include <impl/Kokkos_Error.hpp>
+
+#if defined(KOKKOS_ENABLE_PROFILING)
+#include <impl/Kokkos_Profiling_Interface.hpp>
+#endif
+
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+#define ROCM_SAFE_CALL
+namespace Kokkos {
+namespace Impl {
+using namespace hc;
+
+DeepCopy<Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCm>::DeepCopy( void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+
+DeepCopy<HostSpace,Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCm>::DeepCopy( void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<Kokkos::Experimental::ROCmSpace,HostSpace,Kokkos::Experimental::ROCm>::DeepCopy( void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCm>::DeepCopy( const Kokkos::Experimental::ROCm & instance , void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<HostSpace,Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCm>::DeepCopy( const Kokkos::Experimental::ROCm & instance , void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<Kokkos::Experimental::ROCmSpace,HostSpace,Kokkos::Experimental::ROCm>::DeepCopy( const Kokkos::Experimental::ROCm & instance , void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+
+
+DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,Kokkos::Experimental::ROCmHostPinnedSpace,Kokkos::Experimental::ROCm>::DeepCopy( void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+
+DeepCopy<HostSpace,Kokkos::Experimental::ROCmHostPinnedSpace,Kokkos::Experimental::ROCm>::DeepCopy( void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,HostSpace,Kokkos::Experimental::ROCm>::DeepCopy( void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,Kokkos::Experimental::ROCmHostPinnedSpace,Kokkos::Experimental::ROCm>::DeepCopy( const Kokkos::Experimental::ROCm & instance , void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<HostSpace,Kokkos::Experimental::ROCmHostPinnedSpace,Kokkos::Experimental::ROCm>::DeepCopy( const Kokkos::Experimental::ROCm & instance , void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,HostSpace,Kokkos::Experimental::ROCm>::DeepCopy( const Kokkos::Experimental::ROCm & instance , void * dst , const void * src , size_t n )
+{
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   av.copy( src , dst , n);
+}
+
+
+hc::completion_future DeepCopyAsyncROCm( void * dst , const void * src , size_t n) {
+   hc::accelerator acc;
+   hc::accelerator_view av = acc.get_default_view();
+   return(av.copy_async( src , dst , n));
+}
+
+} // namespace Impl
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
+
+namespace Kokkos {
+
+void Experimental::ROCmSpace::access_error()
+{
+  const std::string msg("Kokkos::Experimental::ROCmSpace::access_error attempt to execute Experimental::ROCm function from non-ROCm space" );
+  Kokkos::Impl::throw_runtime_exception( msg );
+}
+
+void Experimental::ROCmSpace::access_error( const void * const )
+{
+  const std::string msg("Kokkos::Experimental::ROCmSpace::access_error attempt to execute Experimental::ROCm function from non-ROCm space" );
+  Kokkos::Impl::throw_runtime_exception( msg );
+}
+
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
+namespace Kokkos {
+namespace Experimental {
+
+ROCmSpace::ROCmSpace()
+  : m_device( ROCm().rocm_device() )
+{
+}
+
+ROCmHostPinnedSpace::ROCmHostPinnedSpace()
+{
+}
+
+void * ROCmSpace::allocate( const size_t arg_alloc_size ) const
+{
+  void * ptr =  Kokkos::Impl::rocm_device_allocate( arg_alloc_size );
+  return ptr ;
+}
+
+void * Experimental::ROCmHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
+{
+  void * ptr =  Kokkos::Impl::rocm_hostpinned_allocate( arg_alloc_size );
+  return ptr ;
+}
+
+void ROCmSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
+{
+  Kokkos::Impl::rocm_device_free(arg_alloc_ptr);
+}
+
+void Experimental::ROCmHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
+{
+  Kokkos::Impl::rocm_device_free(arg_alloc_ptr);
+}
+
+} // namespace Experimental
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+SharedAllocationRecord< void , void >
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::s_root_record ;
+
+SharedAllocationRecord< void , void >
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::s_root_record ;
+
+
+std::string
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::get_label() const
+{
+  SharedAllocationHeader header ;
+
+  Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) );
+
+  return std::string( header.m_label );
+}
+
+std::string
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::get_label() const
+{
+  return std::string( RecordBase::head()->m_label );
+}
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > *
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+allocate( const Kokkos::Experimental::ROCmSpace &  arg_space
+        , const std::string       &  arg_label
+        , const size_t               arg_alloc_size
+        )
+{
+  return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
+}
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void > *
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+allocate( const Kokkos::Experimental::ROCmHostPinnedSpace &  arg_space
+        , const std::string                 &  arg_label
+        , const size_t                         arg_alloc_size
+        )
+{
+  return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
+}
+
+void
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+deallocate( SharedAllocationRecord< void , void > * arg_rec )
+{
+  delete static_cast<SharedAllocationRecord*>(arg_rec);
+}
+
+void
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+deallocate( SharedAllocationRecord< void , void > * arg_rec )
+{
+  delete static_cast<SharedAllocationRecord*>(arg_rec);
+}
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+~SharedAllocationRecord()
+{
+  #if defined(KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+
+    SharedAllocationHeader header ;
+    Kokkos::Impl::DeepCopy<Kokkos::Experimental::ROCmSpace,HostSpace>( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) );
+
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::ROCmSpace::name()),header.m_label,
+      data(),size());
+  }
+  #endif
+
+  m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
+                    , SharedAllocationRecord< void , void >::m_alloc_size
+                    );
+}
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+~SharedAllocationRecord()
+{
+  #if defined(KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::deallocateData(
+      Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::ROCmHostPinnedSpace::name()),RecordBase::m_alloc_ptr->m_label,
+      data(),size());
+  }
+  #endif
+
+  m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
+                    , SharedAllocationRecord< void , void >::m_alloc_size
+                    );
+}
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+SharedAllocationRecord( const Kokkos::Experimental::ROCmSpace & arg_space
+                      , const std::string       & arg_label
+                      , const size_t              arg_alloc_size
+                      , const SharedAllocationRecord< void , void >::function_type arg_dealloc
+                      )
+  // Pass through allocated [ SharedAllocationHeader , user_memory ]
+  // Pass through deallocation function
+  : SharedAllocationRecord< void , void >
+      ( & SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::s_root_record
+      , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
+      , sizeof(SharedAllocationHeader) + arg_alloc_size
+      , arg_dealloc
+      )
+  , m_space( arg_space )
+{
+  #if defined(KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+  }
+  #endif
+
+  SharedAllocationHeader header ;
+
+  // Fill in the Header information
+  header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
+
+  strncpy( header.m_label
+          , arg_label.c_str()
+          , SharedAllocationHeader::maximum_label_length
+          );
+
+  // Copy to device memory
+  Kokkos::Impl::DeepCopy<Kokkos::Experimental::ROCmSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
+}
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+SharedAllocationRecord( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
+                      , const std::string                 & arg_label
+                      , const size_t                        arg_alloc_size
+                      , const SharedAllocationRecord< void , void >::function_type arg_dealloc
+                      )
+  // Pass through allocated [ SharedAllocationHeader , user_memory ]
+  // Pass through deallocation function
+  : SharedAllocationRecord< void , void >
+      ( & SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::s_root_record
+      , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
+      , sizeof(SharedAllocationHeader) + arg_alloc_size
+      , arg_dealloc
+      )
+  , m_space( arg_space )
+{
+  #if defined(KOKKOS_ENABLE_PROFILING)
+  if(Kokkos::Profiling::profileLibraryLoaded()) {
+    Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
+  }
+  #endif
+  // Fill in the Header information, directly accessible via host pinned memory
+
+  RecordBase::m_alloc_ptr->m_record = this ;
+
+  strncpy( RecordBase::m_alloc_ptr->m_label
+          , arg_label.c_str()
+          , SharedAllocationHeader::maximum_label_length
+          );
+}
+
+//----------------------------------------------------------------------------
+
+void * SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+allocate_tracked( const Kokkos::Experimental::ROCmSpace & arg_space
+                , const std::string & arg_alloc_label
+                , const size_t arg_alloc_size )
+{
+  if ( ! arg_alloc_size ) return (void *) 0 ;
+
+  SharedAllocationRecord * const r =
+    allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+  RecordBase::increment( r );
+
+  return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+  if ( arg_alloc_ptr != 0 ) {
+    SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+    RecordBase::decrement( r );
+  }
+}
+
+void * SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+                  , const size_t arg_alloc_size )
+{
+  SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+  SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+  Kokkos::Impl::DeepCopy<Kokkos::Experimental::ROCmSpace,Kokkos::Experimental::ROCmSpace>( r_new->data() , r_old->data()
+                                             , std::min( r_old->size() , r_new->size() ) );
+
+  RecordBase::increment( r_new );
+  RecordBase::decrement( r_old );
+
+  return r_new->data();
+}
+
+#if 0
+void * SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+allocate_tracked( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
+                , const std::string & arg_alloc_label
+                , const size_t arg_alloc_size )
+{
+  if ( ! arg_alloc_size ) return (void *) 0 ;
+
+  SharedAllocationRecord * const r =
+    allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+  RecordBase::increment( r );
+
+  return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+  if ( arg_alloc_ptr != 0 ) {
+    SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+    RecordBase::decrement( r );
+  }
+}
+
+void * SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+                  , const size_t arg_alloc_size )
+{
+  SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+  SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+  Kokkos::Impl::DeepCopy<Experimental::ROCmHostPinnedSpace,Experimental::ROCmHostPinnedSpace>( r_new->data() , r_old->data()
+                                             , std::min( r_old->size() , r_new->size() ) );
+
+  RecordBase::increment( r_new );
+  RecordBase::decrement( r_old );
+
+  return r_new->data();
+}
+#endif
+
+//----------------------------------------------------------------------------
+
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > *
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::get_record( void * alloc_ptr )
+{
+  using Header     = SharedAllocationHeader ;
+  using RecordBase = SharedAllocationRecord< void , void > ;
+  using RecordROCm = SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > ;
+
+#if 0
+  // Copy the header from the allocation
+  Header head ;
+
+  Header const * const head_rocm = alloc_ptr ? Header::get_header( alloc_ptr ) : (Header*) 0 ;
+
+  if ( alloc_ptr ) {
+    Kokkos::Impl::DeepCopy<HostSpace,Experimental::ROCmSpace>( & head , head_rocm , sizeof(SharedAllocationHeader) );
+  }
+
+  RecordROCm * const record = alloc_ptr ? static_cast< RecordROCm * >( head.m_record ) : (RecordROCm *) 0 ;
+
+  if ( ! alloc_ptr || record->m_alloc_ptr != head_rocm ) {
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::get_record ERROR" ) );
+  }
+
+#else
+
+  // Iterate the list to search for the record among all allocations
+  // requires obtaining the root of the list and then locking the list.
+
+  RecordROCm * const record = static_cast< RecordROCm * >( RecordBase::find( & s_root_record , alloc_ptr ) );
+
+  if ( record == 0 ) {
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::get_record ERROR" ) );
+  }
+
+#endif
+
+  return record ;
+}
+
+#if  0
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void > *
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::get_record( void * alloc_ptr )
+{
+  using Header     = SharedAllocationHeader ;
+  using RecordROCm = SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void > ;
+
+  Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ;
+
+  if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) {
+    Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::get_record ERROR" ) );
+  }
+
+  return static_cast< RecordROCm * >( h->m_record );
+}
+#endif 
+
+// Iterate records to print orphaned memory ...
+void
+SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >::
+print_records( std::ostream & s , const Kokkos::Experimental::ROCmSpace & space , bool detail )
+{
+  SharedAllocationRecord< void , void > * r = & s_root_record ;
+
+  char buffer[256] ;
+
+  SharedAllocationHeader head ;
+
+  if ( detail ) {
+    do {
+      if ( r->m_alloc_ptr ) {
+        Kokkos::Impl::DeepCopy<HostSpace,Kokkos::Experimental::ROCmSpace>( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
+      }
+      else {
+        head.m_label[0] = 0 ;
+      }
+
+      //Formatting dependent on sizeof(uintptr_t)
+      const char * format_string;
+
+      if (sizeof(uintptr_t) == sizeof(unsigned long)) { 
+        format_string = "ROCm addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n";
+      }
+      else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { 
+        format_string = "ROCm addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n";
+      }
+
+      snprintf( buffer , 256 
+              , format_string
+              , reinterpret_cast<uintptr_t>( r )
+              , reinterpret_cast<uintptr_t>( r->m_prev )
+              , reinterpret_cast<uintptr_t>( r->m_next )
+              , reinterpret_cast<uintptr_t>( r->m_alloc_ptr )
+              , r->m_alloc_size
+              , r->m_count
+              , reinterpret_cast<uintptr_t>( r->m_dealloc )
+              , head.m_label
+              );
+      std::cout << buffer ;
+      r = r->m_next ;
+    } while ( r != & s_root_record );
+  }
+  else {
+    do {
+      if ( r->m_alloc_ptr ) {
+
+        Kokkos::Impl::DeepCopy<HostSpace,Kokkos::Experimental::ROCmSpace>( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
+
+        //Formatting dependent on sizeof(uintptr_t)
+        const char * format_string;
+
+        if (sizeof(uintptr_t) == sizeof(unsigned long)) { 
+          format_string = "ROCm [ 0x%.12lx + %ld ] %s\n";
+        }
+        else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { 
+          format_string = "ROCm [ 0x%.12llx + %ld ] %s\n";
+        }
+
+        snprintf( buffer , 256 
+                , format_string
+                , reinterpret_cast< uintptr_t >( r->data() )
+                , r->size()
+                , head.m_label
+                );
+      }
+      else {
+        snprintf( buffer , 256 , "ROCm [ 0 + 0 ]\n" );
+      }
+      std::cout << buffer ;
+      r = r->m_next ;
+    } while ( r != & s_root_record );
+  }
+}
+#if 0
+void
+SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >::
+print_records( std::ostream & s , const Kokkos::Experimental::ROCmHostPinnedSpace & space , bool detail )
+{
+  SharedAllocationRecord< void , void >::print_host_accessible_records( s , "ROCmHostPinned" , & s_root_record , detail );
+}
+#endif 
+
+} // namespace Impl
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+namespace Kokkos {
+namespace {
+#if 0
+  KOKKOS_INLINE_FUNCTION void init_lock_array_kernel_atomic() {
+    unsigned i = tindex()*team_size() + lindex();
+
+    if(i<ROCM_SPACE_ATOMIC_MASK+1)
+      kokkos_impl_rocm_lock_arrays.atomic[i] = 0;
+  }
+
+  KOKKOS_INLINE_FUNCTION void init_lock_array_kernel_scratch_threadid(int N) {
+    unsigned i = tindex()*team_size() + lindex();
+
+    if(i<N) {
+      kokkos_impl_rocm_lock_arrays.scratch[i] = 0;
+      kokkos_impl_rocm_lock_arrays.threadid[i] = 0;
+    }
+  }
+}
+
+
+namespace Impl {
+int* atomic_lock_array_rocm_space_ptr(bool deallocate) {
+  static int* ptr = NULL;
+  if(deallocate) {
+    rocmFree(ptr);
+    ptr = NULL;
+  }
+
+  if(ptr==NULL && !deallocate)
+    rocmMalloc(&ptr,sizeof(int)*(ROCM_SPACE_ATOMIC_MASK+1));
+  return ptr;
+}
+
+int* scratch_lock_array_rocm_space_ptr(bool deallocate) {
+  static int* ptr = NULL;
+  if(deallocate) {
+    rocmFree(ptr);
+    ptr = NULL;
+  }
+
+  if(ptr==NULL && !deallocate)
+    rocmMalloc(&ptr,sizeof(int)*(ROCm::concurrency()));
+  return ptr;
+}
+
+int* threadid_lock_array_rocm_space_ptr(bool deallocate) {
+  static int* ptr = NULL;
+  if(deallocate) {
+    rocmFree(ptr);
+    ptr = NULL;
+  }
+
+  if(ptr==NULL && !deallocate)
+    rocmMalloc(&ptr,sizeof(int)*(ROCm::concurrency()));
+  return ptr;
+}
+
+void init_lock_arrays_rocm_space() {
+  static int is_initialized = 0;
+  if(! is_initialized) {
+    Kokkos::Impl::ROCmLockArraysStruct locks;
+    locks.atomic = atomic_lock_array_rocm_space_ptr(false);
+    locks.scratch = scratch_lock_array_rocm_space_ptr(false);
+    locks.threadid = threadid_lock_array_rocm_space_ptr(false);
+    am_copyToSymbol( kokkos_impl_rocm_lock_arrays , & locks , sizeof(ROCmLockArraysStruct) );
+    init_lock_array_kernel_atomic<<<(ROCM_SPACE_ATOMIC_MASK+255)/256,256>>>();
+    init_lock_array_kernel_scratch_threadid<<<(Kokkos::Experimental::ROCm::concurrency()+255)/256,256>>>(Kokkos::Experimental::ROCm::concurrency());
+  }
+}
+#endif 
+
+void* rocm_resize_scratch_space(size_t bytes, bool force_shrink) {
+  static void* ptr = NULL;
+  static size_t current_size = 0;
+  if(current_size == 0) {
+    current_size = bytes;
+    ptr = Kokkos::kokkos_malloc<Kokkos::Experimental::ROCmSpace>("ROCmSpace::ScratchMemory",current_size);
+  }
+  if(bytes > current_size) {
+    current_size = bytes;
+    ptr = Kokkos::kokkos_realloc<Kokkos::Experimental::ROCmSpace>(ptr,current_size);
+  }
+  if((bytes < current_size) && (force_shrink)) {
+    current_size = bytes;
+    Kokkos::kokkos_free<Kokkos::Experimental::ROCmSpace>(ptr);
+    ptr = Kokkos::kokkos_malloc<Kokkos::Experimental::ROCmSpace>("ROCmSpace::ScratchMemory",current_size);
+  }
+  return ptr;
+}
+
+}
+}
+
+#endif // KOKKOS_ENABLE_ROCM
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.cpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..317995c4f476357aa27c7e63d15791c79e0390dd
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.cpp
@@ -0,0 +1,174 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Core.hpp>
+
+#if defined( KOKKOS_ENABLE_ROCM ) && defined( KOKKOS_ENABLE_TASKDAG )
+
+#include <impl/Kokkos_TaskQueue_impl.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+template class TaskQueue< Kokkos::Experimental::ROCm > ;
+
+
+//----------------------------------------------------------------------------
+KOKKOS_INLINE_FUNCTION
+void TaskQueueSpecialization< Kokkos::Experimental::ROCm >::driver
+  ( TaskQueueSpecialization< Kokkos::Experimental::ROCm >::queue_type * const queue,
+    hc::tiled_index<3> threadIdx )
+{
+  using Member = TaskExec< Kokkos::Experimental::ROCm > ;
+  using Queue  = TaskQueue< Kokkos::Experimental::ROCm > ;
+  using task_root_type = TaskBase< void , void , void > ;
+
+  task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
+
+  Member single_exec( 1, threadIdx );
+  Member team_exec( threadIdx.tile_dim[0], threadIdx );
+
+  const int wavefront_lane = threadIdx.local[0] + threadIdx.local[1]* threadIdx.tile_dim[0] ;
+
+  union {
+    task_root_type * ptr ;
+    int              raw[2] ;
+  } task ;
+
+  // Loop until all queues are empty and no tasks in flight
+
+  do {
+
+    // Each team lead attempts to acquire either a thread team task
+    // or collection of single thread tasks for the team.
+
+    if ( 0 == wavefront_lane ) {
+
+      task.ptr = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ;
+
+      // Loop by priority and then type
+      for ( int i = 0 ; i < Queue::NumQueue && end == task.ptr ; ++i ) {
+        for ( int j = 0 ; j < 2 && end == task.ptr ; ++j ) {
+          task.ptr = Queue::pop_ready_task( & queue->m_ready[i][j] );
+        }
+      }
+
+#if 0
+printf("TaskQueue<ROCm>::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x
+      , uintptr_t(task.ptr));
+#endif
+
+    }
+
+    // shuffle broadcast
+
+    task.raw[0] = hc::__shfl( task.raw[0] , 0 );
+    task.raw[1] = hc::__shfl( task.raw[1] , 0 );
+
+    if ( 0 == task.ptr ) break ; // 0 == queue->m_ready_count
+
+    if ( end != task.ptr ) {
+      if ( task_root_type::TaskTeam == task.ptr->m_task_type ) {
+        // Thread Team Task
+        (*task.ptr->m_apply)( task.ptr , & team_exec );
+      }
+      else if ( 0 == threadIdx.local[1] ) {
+        // Single Thread Task
+        (*task.ptr->m_apply)( task.ptr , & single_exec );
+      }
+
+      if ( 0 == wavefront_lane ) {
+        queue->complete( task.ptr );
+      }
+    }
+  } while(1);
+}
+#if 0
+namespace {
+KOKKOS_INLINE_FUNCTION
+void rocm_task_queue_execute( TaskQueue< Kokkos::Experimental::ROCm > * queue, 
+                              hc::tiled_index<3> threadIdx )
+{ TaskQueueSpecialization< Kokkos::Experimental::ROCm >::driver( queue, threadIdx ); }
+
+}
+#endif
+void TaskQueueSpecialization< Kokkos::Experimental::ROCm >::execute
+  ( TaskQueue< Kokkos::Experimental::ROCm > * const queue )
+{
+  const int workgroups_per_wavefront = 4 ;
+  const int wavefront_size = Kokkos::Impl::ROCmTraits::WavefrontSize ;
+  const int cu_count = Kokkos::Impl::rocm_internal_cu_count();
+//  const dim3 grid( Kokkos::Impl::rocm_internal_cu_count() , 1 , 1 );
+//  const dim3 block( 1 , Kokkos::Impl::ROCmTraits::WorkGroupSize , workgroups_per_wavefront );
+
+
+
+  // Query the stack size, in bytes:
+  // If not large enough then set the stack size, in bytes:
+
+// adapted from the cuda code.  TODO: Not at all sure that this is the proper 
+// to map the cuda grid/blocks/3D tiling to HCC
+#if 0
+  hc::extent< 3 > flat_extent(  cu_count,
+                                wavefront_size, workgroups_per_wavefront );
+  hc::tiled_extent< 3 > team_extent = flat_extent.tile(1,
+                                wavefront_size,workgroups_per_wavefront);
+
+  hc::parallel_for_each( team_extent , [&](hc::tiled_index<3> idx) [[hc]]
+  {
+    TaskQueueSpecialization< Kokkos::Experimental::ROCm >::driver( queue,idx ); 
+  }).wait();
+#endif
+}
+
+
+}} /* namespace Kokkos::Impl */
+
+//----------------------------------------------------------------------------
+
+#endif /* #if defined( KOKKOS_ENABLE_ROCM ) && defined( KOKKOS_ENABLE_TASKDAG ) */
+
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..16badcc1d4d50e3f4bbd0ff387b327b30dc945dc
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.hpp
@@ -0,0 +1,458 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_IMPL_ROCM_TASK_HPP
+#define KOKKOS_IMPL_ROCM_TASK_HPP
+
+#if defined( KOKKOS_ENABLE_TASKDAG )
+
+#include <ROCm/Kokkos_ROCm_Vectorization.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+template< class > class TaskExec ; 
+
+template<>
+class TaskQueueSpecialization< Kokkos::Experimental::ROCm >
+{
+public:
+
+  using execution_space = Kokkos::Experimental::ROCm ;
+  using queue_type      = Kokkos::Impl::TaskQueue< execution_space > ;
+  using task_base_type  = Kokkos::Impl::TaskBase< execution_space , void , void > ;
+  using member_type     = TaskExec< execution_space > ;
+
+  // Must specify memory space
+  using memory_space = Kokkos::HostSpace ;
+
+  static
+  void iff_single_thread_recursive_execute( queue_type * const ) {}
+
+  KOKKOS_INLINE_FUNCTION
+  static void driver( queue_type * const, hc::tiled_index<3> );
+
+  // Must provide task queue execution function
+  static void execute( queue_type * const );
+
+  // Must provide mechanism to set function pointer in
+  // execution space from the host process.
+  template< typename FunctorType >
+  static
+  void proc_set_apply( typename TaskBase< Kokkos::Experimental::ROCm
+                               , typename FunctorType::value_type
+                               , FunctorType
+                               >::function_type * ptr )
+    {
+      using TaskType = TaskBase< Kokkos::Experimental::ROCm
+                               , typename FunctorType::value_type
+                               , FunctorType
+                               > ;
+      hc::extent< 1 > flat_extent( 1 );
+      hc::tiled_extent< 1 > team_extent = flat_extent.tile( 1);
+
+      hc::parallel_for_each( team_extent , [&](hc::tiled_index<1> idx) [[hc]]
+      {
+         *ptr = TaskType::apply ;
+      }).wait();
+    }
+};
+
+/*template<>
+KOKKOS_FUNCTION 
+void TaskQueue<Kokkos::Experimental::ROCm>::decrement( typename TaskQueue<Kokkos::Experimental::ROCm>::task_root_type *
+) {}
+*/
+extern template class TaskQueue< Kokkos::Experimental::ROCm > ;
+
+//----------------------------------------------------------------------------
+/**\brief  Impl::TaskExec<ROCm> is the TaskScheduler<ROCm>::member_type
+ *         passed to tasks running in a ROCm space.
+ *
+ *  ROCm thread blocks for tasking are dimensioned:
+ *    idx.tile_dim[0] == vector length
+ *    idx.tile_dim[1] == team size
+ *    idx.tile_dim[2] == number of teams
+ *  where
+ *    idx.tile_dim[0] * idx.tile_dim[1] == WavefrontSize
+ *
+ *  Both single thread and thread team tasks are run by a full ROCm warp.
+ *  A single thread task is called by warp lane #0 and the remaining
+ *  lanes of the warp are idle.
+ */
+template<>
+class TaskExec< Kokkos::Experimental::ROCm >
+{
+private:
+
+  TaskExec( TaskExec && ) = delete ;
+  TaskExec( TaskExec const & ) = delete ;
+  TaskExec & operator = ( TaskExec && ) = delete ;
+  TaskExec & operator = ( TaskExec const & ) = delete ;
+
+
+  friend class Kokkos::Impl::TaskQueue< Kokkos::Experimental::ROCm > ;
+  friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Experimental::ROCm > ;
+
+  int              m_team_size ;
+  hc::tiled_index<3>      m_idx;
+
+//  KOKKOS_INLINE_FUNCTION TaskExec( int arg_team_size )  //TODO: tile_dim[0]
+//    : m_team_size( arg_team_size ) {}
+
+  KOKKOS_INLINE_FUNCTION TaskExec( int arg_team_size,
+                                   hc::tiled_index<3> tidx)  
+    : m_team_size( arg_team_size),
+      m_idx( tidx ) {}
+
+public:
+//      const auto local = t_idx.local[0];
+//      const auto global = t_idx.global[0];
+//     const auto tile = t_idx.tile[0];
+
+  hc::tiled_index<3> idx() const { return m_idx;}
+
+#if defined( __HCC_ACCELERATOR__ )
+  KOKKOS_INLINE_FUNCTION void team_barrier() { /* __threadfence_block(); */ }
+  KOKKOS_INLINE_FUNCTION int  team_rank() const { return m_idx.local[1] ; } // t_idx.tile[0];
+  KOKKOS_INLINE_FUNCTION int  team_size() const { return m_team_size ; }
+#else
+  KOKKOS_INLINE_FUNCTION void team_barrier() {}
+  KOKKOS_INLINE_FUNCTION int  team_rank() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION int  team_size() const { return 0 ; }
+#endif
+};
+
+}} /* namespace Kokkos::Impl */
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+namespace Kokkos {
+
+template<typename iType>
+KOKKOS_INLINE_FUNCTION
+Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >
+TeamThreadRange
+  ( Impl::TaskExec< Kokkos::Experimental::ROCm > & thread, const iType & count )
+{
+  return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >(thread,count);
+}
+
+template<typename iType1, typename iType2>
+KOKKOS_INLINE_FUNCTION
+Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
+                                       Impl::TaskExec< Kokkos::Experimental::ROCm > >
+TeamThreadRange
+  ( Impl:: TaskExec< Kokkos::Experimental::ROCm > & thread, const iType1 & begin, const iType2 & end )
+{
+  typedef typename std::common_type<iType1, iType2>::type iType;
+  return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::TaskExec< Kokkos::Experimental::ROCm > >(thread, begin, end);
+}
+
+template<typename iType>
+KOKKOS_INLINE_FUNCTION
+Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >
+ThreadVectorRange
+  ( Impl::TaskExec< Kokkos::Experimental::ROCm > & thread
+  , const iType & count )
+{
+  return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >(thread,count);
+}
+
+/** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
+ *
+ * The range i=0..N-1 is mapped to all threads of the the calling thread team.
+ * This functionality requires C++11 support.
+*/
+template<typename iType, class Lambda>
+KOKKOS_INLINE_FUNCTION
+void parallel_for
+  ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries
+  , const Lambda& lambda
+  )
+{
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i);
+  }
+}
+
+// reduce across corresponding lanes between team members within workgroup
+// assume stride*team_size == workgroup_size
+template< typename ValueType >
+KOKKOS_INLINE_FUNCTION
+void strided_shfl_workgroup_reduction
+  (const ValueType& f(),
+   ValueType& val,
+   int team_size,
+   int stride)
+{
+  for (int lane_delta=(team_size*stride)>>1; lane_delta>=stride; lane_delta>>=1) {
+    f(val, Kokkos::shfl_down(val, lane_delta, team_size*stride));
+  }
+}
+
+template< typename ValueType, class JoinType >
+KOKKOS_INLINE_FUNCTION
+void strided_shfl_workgroup_reduction
+  (const JoinType& join,
+   ValueType& val,
+   int team_size,
+   int stride)
+{
+  for (int lane_delta=(team_size*stride)>>1; lane_delta>=stride; lane_delta>>=1) {
+    join(val, shfl_down(val, lane_delta, team_size*stride));
+  }
+}
+
+// multiple within-workgroup non-strided reductions
+template< typename ValueType, class JoinType >
+KOKKOS_INLINE_FUNCTION
+void multi_shfl_workgroup_reduction
+  (const JoinType& join,
+   ValueType& val,
+   int vec_length)
+{
+  for (int lane_delta=vec_length>>1; lane_delta; lane_delta>>=1) {
+    join(val, shfl_down(val, lane_delta, vec_length));
+  }
+}
+
+// broadcast within workgroup
+template< class ValueType >
+KOKKOS_INLINE_FUNCTION
+ValueType shfl_workgroup_broadcast
+  (ValueType& val,
+   int src_lane,
+   int width)
+{
+  return shfl(val, src_lane, width);
+}
+
+// all-reduce across corresponding vector lanes between team members within workgroup
+// assume vec_length*team_size == workgroup_size
+// blockDim.x == vec_length == stride
+// blockDim.y == team_size
+// threadIdx.x == position in vec
+// threadIdx.y == member number
+
+template<typename iType, class Lambda, typename ValueType>
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce
+  ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries
+  , const Lambda& lambda
+  , ValueType& initialized_result)
+{
+  int team_rank = loop_boundaries.thread.team_rank(); // member num within the team
+  ValueType result = initialized_result;
+  hc::tiled_index<3> idx = loop_boundaries.thread.idx();
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i, result);
+  }
+  initialized_result = result;
+
+  strided_shfl_workgroup_reduction(
+                          [&] (ValueType& val1, const ValueType& val2) { val1 += val2; },
+                          initialized_result,
+                          loop_boundaries.thread.team_size(),
+                          idx.tile_dim[0]);
+  initialized_result = shfl_workgroup_broadcast<ValueType>( initialized_result, idx.local[0], Impl::ROCmTraits::WavefrontSize );
+
+}
+
+template< typename iType, class Lambda, typename ValueType, class JoinType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce
+  (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries,
+   const Lambda & lambda,
+   const JoinType & join,
+   ValueType& initialized_result)
+{
+   hc::tiled_index<3> idx = loop_boundaries.thread.idx();
+  int team_rank = loop_boundaries.thread.team_rank(); // member num within the team
+  ValueType result = initialized_result;
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i, result);
+  }
+
+  strided_shfl_workgroup_reduction<ValueType, JoinType>(
+                          join,
+                          initialized_result,
+                          loop_boundaries.thread.team_size(),
+                          idx.tile_dim[0]);
+  initialized_result = shfl_workgroup_broadcast<ValueType>( initialized_result, idx.local[0], Impl::ROCmTraits::WavefrontSize );
+}
+
+// placeholder for future function
+template< typename iType, class Lambda, typename ValueType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce
+  (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries,
+   const Lambda & lambda,
+   ValueType& initialized_result)
+{
+  ValueType result = initialized_result;
+  hc::tiled_index<3> idx = loop_boundaries.thread.idx();
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i,result);
+  }
+
+  initialized_result = result;
+
+  //initialized_result = multi_shfl_workgroup_reduction(
+  multi_shfl_workgroup_reduction(
+                          [&] (ValueType& val1, const ValueType& val2) { val1 += val2; },
+                          initialized_result,
+                          idx.tile_dim[0]);
+  initialized_result = shfl_workgroup_broadcast<ValueType>( initialized_result, 0, idx.tile_dim[0] );
+}
+
+// placeholder for future function
+template< typename iType, class Lambda, typename ValueType, class JoinType >
+KOKKOS_INLINE_FUNCTION
+void parallel_reduce
+  (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries,
+   const Lambda & lambda,
+   const JoinType & join,
+   ValueType& initialized_result)
+{
+  hc::tiled_index<3> idx = loop_boundaries.thread.idx();
+  ValueType result = initialized_result;
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    lambda(i,result);
+  }
+  initialized_result = result;
+
+  multi_shfl_workgroup_reduction<ValueType, JoinType>(join, initialized_result, idx.tile_dim[0]);
+  initialized_result = shfl_workgroup_broadcast<ValueType>( initialized_result, 0, idx.tile_dim[0] );
+}
+
+template< typename ValueType, typename iType, class Lambda >
+KOKKOS_INLINE_FUNCTION
+void parallel_scan
+  (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries,
+   const Lambda & lambda)
+{
+  hc::tiled_index<3> idx = loop_boundaries.thread.idx();
+  ValueType accum = 0 ;
+  ValueType val, y, local_total;
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    val = 0;
+    lambda(i,val,false);
+
+    // intra-idx.tile_dim[0] exclusive scan on 'val'
+    // accum = accumulated, sum in total for this iteration
+
+    // INCLUSIVE scan
+    for( int offset = idx.tile_dim[0] ; offset < Impl::ROCmTraits::WavefrontSize ; offset <<= 1 ) {
+      y = shfl_up(val, offset, Impl::ROCmTraits::WavefrontSize);
+      if(idx.local[1]*idx.tile_dim[0] >= offset) { val += y; }
+    }
+
+    // pass accum to all threads
+    local_total = shfl_workgroup_broadcast<ValueType>(val,
+                                            idx.local[0]+Impl::ROCmTraits::WavefrontSize-idx.tile_dim[0],
+                                            Impl::ROCmTraits::WavefrontSize);
+
+    // make EXCLUSIVE scan by shifting values over one
+    val = shfl_up(val, idx.tile_dim[0], Impl::ROCmTraits::WavefrontSize);
+    if ( idx.local[1] == 0 ) { val = 0 ; }
+
+    val += accum;
+    lambda(i,val,true);
+    accum += local_total;
+  }
+}
+
+// placeholder for future function
+template< typename iType, class Lambda, typename ValueType >
+KOKKOS_INLINE_FUNCTION
+void parallel_scan
+  (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::ROCm > >& loop_boundaries,
+   const Lambda & lambda)
+{
+  hc::tiled_index<3> idx = loop_boundaries.thread.idx();
+  ValueType accum = 0 ;
+  ValueType val, y, local_total;
+
+  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
+    val = 0;
+    lambda(i,val,false);
+
+    // intra-idx.tile_dim[0] exclusive scan on 'val'
+    // accum = accumulated, sum in total for this iteration
+
+    // INCLUSIVE scan
+    for( int offset = 1 ; offset < idx.tile_dim[0] ; offset <<= 1 ) {
+      y = shfl_up(val, offset, idx.tile_dim[0]);
+      if(idx.local[0] >= offset) { val += y; }
+    }
+
+    // pass accum to all threads
+    local_total = shfl_workgroup_broadcast<ValueType>(val, idx.tile_dim[0]-1, 
+                                                 idx.tile_dim[0]);
+
+    // make EXCLUSIVE scan by shifting values over one
+    val = shfl_up(val, 1, idx.tile_dim[0]);
+    if ( idx.local[0] == 0 ) { val = 0 ; }
+
+    val += accum;
+    lambda(i,val,true);
+    accum += local_total;
+  }
+}
+
+
+} /* namespace Kokkos */
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
+#endif /* #ifndef KOKKOS_IMPL_ROCM_TASK_HPP */
+
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e1a89e379462d0580ae0ac3b53937d454d155ea8
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp
@@ -0,0 +1,518 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <hc.hpp>
+#include <type_traits>
+#include <vector>
+#include <memory>
+#include <ROCm/Kokkos_ROCm_Config.hpp>
+
+#if !defined( KOKKOS_ROCM_TILE_H )
+#define KOKKOS_ROCM_TILE_H
+
+// Macro to abstract out the enable_if craziness
+#define KOKKOS_ROCM_REQUIRES(...) \
+    bool KokkosROCmRequiresBool ## __LINE__ = true, typename std::enable_if<KokkosROCmRequiresBool ## __LINE__ && (__VA_ARGS__), int>::type = 0
+
+// This number uniquely identifies the 1.5 release build.  
+#if __hcc_workweek__ > 17160
+#define ROCM15 1
+#endif
+
+namespace Kokkos {
+namespace Impl {
+
+template<class T>
+
+#if defined(ROCM15)
+using lds_t = T;
+#else
+// prior to 1.5, needed to decorate LDS addresses
+using lds_t = __attribute__((address_space(3))) T;
+#endif
+
+#define KOKKOS_ROCM_TILE_RESTRIC_CPU restrict(cpu, amp)
+
+// a set of routines to the replace the std::routines
+// that will operate on address space 3 types
+
+#if defined(ROCM15)
+// 1.5 can't use std::copy et al for LDS access, so we define our own 
+// set of routines
+template<class I, class O>
+void rcopy(I first, I last, O out) [[hc]]
+{
+    while (first != last) *out++ = *first++;
+}
+template<class I,class F>
+void rfor_each(I first, I last, F f) [[hc]]
+{
+  for(;first!=last;++first) f(*first);
+}
+
+template<class I,class O,class F>
+void rtransform(I first, I last, O out, F f) [[hc]]
+{
+  while(first!=last) *out++ = f(*first++);
+}
+#endif
+
+
+inline std::size_t get_max_tile_size() KOKKOS_ROCM_TILE_RESTRIC_CPU
+{
+    return hc::accelerator().get_max_tile_static_size() - 1024;
+}
+
+inline std::size_t get_max_tile_thread() KOKKOS_ROCM_TILE_RESTRIC_CPU
+{
+    return 64;
+}
+
+inline int next_pow_2(int x) restrict(cpu, amp)
+{ 
+    --x;
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+    return x+1;
+}
+
+template<class T>
+inline std::size_t get_tile_size(std::size_t n = 1,
+                                 std::size_t team = 64, 
+                                 std::size_t vector = 1) 
+                                 KOKKOS_ROCM_TILE_RESTRIC_CPU
+{
+    const auto size = sizeof(T) * n;
+    const auto group_size = get_max_tile_size();
+    if (size == 0 || size > group_size) return 0;
+    // Assume that thread size is a power of 2
+    auto thread_size = std::min(team*vector,4*get_max_tile_thread());
+    // ensure that we have enough tile static memory to keep
+    // threadsize * size elements for reductions
+    while(size > (group_size / thread_size) && thread_size > 2)
+{ thread_size /= 2;
+}
+    return thread_size;
+}
+
+template<class T>
+struct array_view
+{
+    T* x;
+    std::size_t n;
+
+    array_view(T* xp, std::size_t np) [[hc]] [[cpu]] 
+    : x(xp), n(np)
+    {}
+
+    array_view(T* xp, T* yp) [[hc]] [[cpu]] 
+    : x(xp), n(yp-xp)
+    {}
+
+    T& operator[](std::size_t i) const [[hc]] [[cpu]]
+    {
+        return x[i];
+    }
+
+    std::size_t size() const [[hc]] [[cpu]]
+    {
+        return this->n;
+    }
+
+    T* data() const [[hc]] [[cpu]]
+    {
+        return x;
+    }
+
+    T* begin() const [[hc]] [[cpu]]
+    {
+        return x;
+    }
+
+    T* end() const [[hc]] [[cpu]]
+    {
+        return x+this->size();
+    }
+};
+
+template<class T>
+struct rocm_char
+{ using type=char; };
+
+template<class T>
+struct rocm_char<const T>
+: std::add_const<typename rocm_char<T>::type>
+{};
+#if !defined(ROCM15)
+// earlier compilers required explicit address space decorations
+template<class T>
+struct rocm_char<__attribute__((address_space(3))) T>
+{ using type = __attribute__((address_space(3))) typename rocm_char<T>::type; };
+
+template<class T>
+struct rocm_char<const __attribute__((address_space(3))) T>
+{ using type = const __attribute__((address_space(3))) typename rocm_char<T>::type; };
+#endif
+
+template<class T, class Char=typename rocm_char<T>::type>
+Char* rocm_byte_cast(T& x) restrict(cpu, amp)
+{
+    return reinterpret_cast<Char*>(&x);
+}
+
+template<class T, class U>
+void rocm_raw_assign(T& x, const U& y) restrict(cpu, amp)
+{
+    auto * src = rocm_byte_cast(y);
+    auto * dest = rocm_byte_cast(x);
+#if defined (ROCM15)
+    rcopy(src, src+sizeof(T), dest);
+#else
+    std::copy(src, src+sizeof(T), dest);
+#endif
+}
+
+template<class T, class U>
+void rocm_assign_impl(T& x, const U& y, std::true_type) restrict(cpu, amp)
+{
+    rocm_raw_assign(x, y);
+}
+
+template<class T, class U>
+void rocm_assign_impl(T& x, const U& y, std::false_type) restrict(cpu, amp)
+{
+    x = y;
+}
+
+// Workaround for assigning in and out of LDS memory
+template<class T, class U>
+void rocm_assign(T& x, const U& y) restrict(cpu, amp)
+{
+    rocm_assign_impl(x, y, std::integral_constant<bool, (
+        sizeof(T) == sizeof(U)
+    )>());
+}
+
+// Compute the address space of tile
+template<class T>
+struct tile_type
+{
+#if defined (ROCM15)
+    typedef T type;
+#else
+    typedef __attribute__((address_space(3))) T type;
+#endif
+};
+
+#if !defined (ROCM15)
+template<class T, class Body>
+void lds_for(__attribute__((address_space(3))) T& value, Body b) [[hc]]
+{
+    T state = value;
+    b(state);
+    value = state;
+}
+#endif
+
+template<class T, class Body>
+void lds_for(T& value, Body b) [[hc]]
+{
+    b(value);
+}
+
+
+constexpr std::size_t get_max_tile_array_size()
+{
+    return 24;
+}
+
+template<class Derived, class T>
+struct single_action
+{
+    template<class Action>
+    void action_at(std::size_t i, Action a) [[hc]]
+    {
+        auto& value = static_cast<Derived&>(*this)[i];
+#if KOKKOS_ROCM_HAS_WORKAROUNDS
+        T state = value;
+        a(state);
+        value = state;
+#else
+        a(value);
+#endif
+    }
+
+    template<class Action>
+    void action_at(std::size_t i, std::size_t j, Action a) [[hc]]
+    {
+        static_cast<Derived&>(*this).action_at(i, [&](T& x)
+        {
+            static_cast<Derived&>(*this).action_at(j, [&](T& y)
+            {
+                a(x, y);
+            });
+        });
+    }
+};
+
+template<class T>
+struct tile_buffer
+: array_view<typename tile_type<T>::type>, single_action<tile_buffer<T>, T>
+{
+    typedef typename tile_type<T>::type element_type;
+    typedef array_view<element_type> base;
+
+    using base::base;
+
+    tile_buffer(element_type* xp, std::size_t np, std::size_t) [[hc]] [[cpu]] 
+    : base(xp, np)
+    {}
+
+    tile_buffer(T* xp, T* yp, std::size_t) [[hc]] [[cpu]] 
+    : base(xp, yp)
+    {}
+};
+
+template<class T>
+struct tile_buffer<T[]>
+{
+    typedef typename tile_type<T>::type element_type;
+    typedef typename tile_type<char>::type tchar_type;
+    element_type* element_data;
+    std::size_t n, m;
+
+    tile_buffer(element_type* xp, std::size_t np, std::size_t mp) [[hc]] [[cpu]] 
+    : element_data(xp), n(np), m(mp)
+    {}
+
+    tile_buffer(element_type* xp, element_type* yp, std::size_t mp) [[hc]] [[cpu]] 
+    : element_data(xp), n(yp-xp), m(mp)
+    {}
+
+    element_type* operator[](std::size_t i) const [[hc]] [[cpu]]
+    {
+        return element_data+i*m;
+    }
+
+    template<class Action, class Q = T>
+    typename Impl::enable_if< (sizeof(Q) <= 8) , void >::type
+    action_at(std::size_t i, Action a) [[hc]]
+    {
+        element_type* value = (*this)[i];
+#if defined (ROCM15)
+        a(value);
+#else
+#if KOKKOS_ROCM_HAS_WORKAROUNDS
+        if (m > get_max_tile_array_size()) return;
+        T state[get_max_tile_array_size()];
+        // std::copy(value, value+m, state);
+        // Workaround for assigning from LDS memory
+        std::transform(value, value+m, state, [](element_type& x)
+        {
+          T result;
+          rocm_assign(result, x);
+          return result;
+        });
+        a(state);
+        std::copy(state, state+m, value);
+#endif
+#endif
+    }
+
+    template<class Action, class Q = T>
+    typename Impl::enable_if< !(sizeof(Q) <= 8) , void >::type
+    action_at(std::size_t i, Action a) [[hc]]
+    {
+        element_type* value = (*this)[i];
+#if defined (ROCM15)
+        a(value);
+#else
+//#if KOKKOS_ROCM_HAS_WORKAROUNDS
+        if (m > get_max_tile_array_size()) return;
+        T state[get_max_tile_array_size()];
+        // std::copy(value, value+m, state);
+        // Workaround for assigning from LDS memory
+        std::transform(value, value+m, state, [](element_type& x)
+        {
+          T result;
+          rocm_assign(result, x);
+          return result;
+        });
+        a(state);
+        // this workaround required when T is greater than 8 bytes
+        tile_static char tv[64*sizeof(T)];
+        size_t sT = sizeof(T);
+        for (int j = 0; j<sT; j++) tv[i*sT+j] = ((char *)state)[j];
+        for (int j = 0; j<sT; j++) ((tchar_type *)value)[j] = tv[i*sT+j];
+#endif
+    }
+
+    template<class Action>
+    void action_at(std::size_t i, std::size_t j, Action a) [[hc]]
+    {
+        this->action_at(i, [&](T* x)
+        {
+            this->action_at(j, [&](T* y)
+            {
+                a(x, y);
+            });
+        });
+    }
+
+    std::size_t size() const [[hc]] [[cpu]]
+    {
+        return this->n;
+    }
+
+    element_type* data() const [[hc]] [[cpu]]
+    {
+        return element_data;
+    }
+};
+
+// Zero initialize LDS memory
+struct zero_init_f
+{
+    template<class T>
+#if defined (ROCM15)
+    void operator()(T& x, std::size_t=1) const [[hc]]
+    {
+        auto * start = reinterpret_cast<char*>(&x);
+        for(int i=0; i<sizeof(T);i++) start[i] = 0;
+        rocm_raw_assign(x, T());
+    }
+#else
+    void operator()(__attribute__((address_space(3))) T& x, std::size_t=1) const [[hc]]
+    {
+        auto * start = reinterpret_cast<__attribute__((address_space(3))) char*>(&x);
+        std::fill(start, start+sizeof(T), 0);
+        rocm_raw_assign(x, T());
+    }
+#endif
+
+    template<class T>
+#if defined (ROCM15)
+    void operator()(T* x, std::size_t size) const [[hc]]
+    {
+        rfor_each(x, x+size, *this);
+    }
+#else
+    void operator()(__attribute__((address_space(3))) T* x, std::size_t size) const [[hc]]
+    {
+        std::for_each(x, x+size, *this);
+    }
+#endif
+};
+
+static constexpr zero_init_f zero_init = {};
+
+struct tile_desc
+{
+    // Number of work items, or size of extent
+    std::size_t elements;
+    // number of threads in team 
+    std::size_t team_size;
+    // vector length of team
+    std::size_t vector_length;
+    // Size of tile
+    std::size_t tile_size;
+    // Size of array
+    std::size_t array_size;
+    // Number of tiles
+    std::size_t num_tiles;
+    // Per team reserved LDS memory, used for reduction
+    std::size_t reduce_size;
+    // Per team shared memory in LDS, this in addition to reduce shared mem
+    std::size_t shared_size;
+    std::size_t size;
+};
+
+template<class T>
+tile_desc get_tile_desc(std::size_t size, 
+                        std::size_t array_size=1,
+                        std::size_t team_size=64,
+                        std::size_t vector_size=1,
+                        std::size_t shared_size=0)
+{
+    tile_desc result;
+    result.elements = size;
+    result.array_size = array_size;
+    result.vector_length = vector_size;
+    result.team_size = team_size;
+    result.tile_size = get_tile_size<T>(array_size,team_size,vector_size);
+    result.num_tiles = std::ceil(1.0 * size / result.tile_size);
+    result.reduce_size = result.tile_size * sizeof(T) * array_size;
+    result.shared_size = shared_size;
+    result.size = result.tile_size * result.num_tiles;
+
+    return result;
+}
+
+template<class U, class F, class T=typename std::remove_extent<U>::type>
+hc::completion_future tile_for(tile_desc td, F f) 
+{
+    assert(td.array_size <= get_max_tile_array_size() && "Exceed max array size");
+    assert(((td.size % td.tile_size) == 0) && "Tile size must be divisible by extent");
+    auto grid = hc::extent<1>(td.size).tile_with_dynamic(
+                          td.tile_size, td.reduce_size + td.shared_size);
+    // grid.set_dynamic_group_segment_size(td.reduce_size + td.shared_size);
+    return parallel_for_each(grid, [=](hc::tiled_index<1> t_idx) [[hc]] 
+    {
+#if defined (ROCM15)
+        typedef T group_t;
+#else
+        typedef __attribute__((address_space(3))) T group_t;
+#endif
+        group_t * buffer = (group_t *)hc::get_dynamic_group_segment_base_pointer();
+        tile_buffer<U> tb(buffer, td.tile_size, td.array_size);
+        zero_init(tb[t_idx.local[0]], td.array_size);
+        f(t_idx, tb);
+    });
+}
+
+}}
+
+#endif
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ac166f9ad8d7ed4a214b5d710ba4b632455e5968
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp
@@ -0,0 +1,346 @@
+/*
+//@HEADER
+// ************************************************************************
+// 
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+// 
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+// 
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_ROCM_VECTORIZATION_HPP
+#define KOKKOS_ROCM_VECTORIZATION_HPP
+
+#include <Kokkos_Macros.hpp>
+
+/* only compile this file if ROCM is enabled for Kokkos */
+#ifdef KOKKOS_ENABLE_ROCM
+
+#include <Kokkos_ROCm.hpp>
+
+namespace Kokkos {
+using namespace hc;
+
+// Shuffle only makes sense on >= Fiji GPUs; it doesn't work on CPUs
+// or other GPUs.  We provide a generic definition (which is trivial
+// and doesn't do what it claims to do) because we don't actually use
+// this function unless we are on a suitable GPU, with a suitable
+// Scalar type.  (For example, in the mat-vec, the "ThreadsPerRow"
+// internal parameter depends both on the ExecutionSpace and the Scalar type,
+// and it controls whether shfl_down() gets called.)
+namespace Impl {
+
+  template< typename Scalar >
+  struct shfl_union {
+    enum {n = sizeof(Scalar)/4};
+    float fval[n];
+    KOKKOS_INLINE_FUNCTION
+    Scalar value() {
+      return *(Scalar*) fval;
+    }
+    KOKKOS_INLINE_FUNCTION
+    void operator= (Scalar& value_) {
+      float* const val_ptr = (float*) &value_;
+      for(int i=0; i<n ; i++) {
+        fval[i] = val_ptr[i];
+      }
+    }
+    KOKKOS_INLINE_FUNCTION
+    void operator= (const Scalar& value_) {
+      float* const val_ptr = (float*) &value_;
+      for(int i=0; i<n ; i++) {
+        fval[i] = val_ptr[i];
+      }
+    }
+
+  };
+}
+
+#ifdef __HCC_ACCELERATOR__
+
+    KOKKOS_INLINE_FUNCTION
+    int __long2loint(const long val ) {
+    union {
+      long l;
+      int i[2];
+    } u;
+      u.l = val;
+      return u.i[0];
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int __long2hiint(const long val ) {
+    union {
+      long l;
+      int i[2];
+    } u;
+      u.l = val;
+      return u.i[1];
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int __double2loint(const double val ) {
+    union {
+      double d;
+      int i[2];
+    } u;
+      u.d = val;
+      return u.i[0];
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int __double2hiint(const double val ) {
+    union {
+      double d;
+      int i[2];
+    } u;
+      u.d = val;
+      return u.i[1];
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    long __hiloint2long(const int hi, const int lo ) {
+    union {
+      long l;
+      int i[2];
+    } u;
+      u.i[0] = lo;
+      u.i[1] = hi;
+      return u.l;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double __hiloint2double(const int hi, const int lo ) {
+    union {
+      double d;
+      int i[2];
+    } u;
+      u.i[0] = lo;
+      u.i[1] = hi;
+      return u.d;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int shfl(const int &val, const int& srcLane, const int& width ) {
+      return __shfl(val,srcLane,width);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    float shfl(const float &val, const int& srcLane, const int& width ) {
+      return __shfl(val,srcLane,width);
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type& width
+        ) {
+      Scalar tmp1 = val;
+      float tmp = *reinterpret_cast<float*>(&tmp1);
+      tmp = __shfl(tmp,srcLane,width);
+      return *reinterpret_cast<Scalar*>(&tmp);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double shfl(const double &val, const int& srcLane, const int& width) {
+      int lo = __double2loint(val);
+      int hi = __double2hiint(val);
+      lo = __shfl(lo,srcLane,width);
+      hi = __shfl(hi,srcLane,width);
+      return __hiloint2double(hi,lo);
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) {
+      int lo = __double2loint(*reinterpret_cast<const double*>(&val));
+      int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
+      lo = __shfl(lo,srcLane,width);
+      hi = __shfl(hi,srcLane,width);
+      const double tmp = __hiloint2double(hi,lo);
+      return *(reinterpret_cast<const Scalar*>(&tmp));
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) > 8) ,int>::type& width) {
+      Impl::shfl_union<Scalar> s_val;
+      Impl::shfl_union<Scalar> r_val;
+      s_val = val;
+
+      for(int i = 0; i<s_val.n; i++)
+        r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width);
+      return r_val.value();
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int shfl_down(const int &val, const int& delta, const int& width) {
+      return __shfl_down(val,delta,width);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    float shfl_down(const float &val, const int& delta, const int& width) {
+      return __shfl_down(val,delta,width);
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
+      Scalar tmp1 = val;
+      float tmp = *reinterpret_cast<float*>(&tmp1);
+      tmp = __shfl_down(tmp,delta,width);
+      return *reinterpret_cast<Scalar*>(&tmp);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    long shfl_down(const long &val, const int& delta, const int& width) {
+      int lo = __long2loint(val);
+      int hi = __long2hiint(val);
+      lo = __shfl_down(lo,delta,width);
+      hi = __shfl_down(hi,delta,width);
+      return __hiloint2long(hi,lo);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double shfl_down(const double &val, const int& delta, const int& width) {
+      int lo = __double2loint(val);
+      int hi = __double2hiint(val);
+      lo = __shfl_down(lo,delta,width);
+      hi = __shfl_down(hi,delta,width);
+      return __hiloint2double(hi,lo);
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
+      int lo = __double2loint(*reinterpret_cast<const double*>(&val));
+      int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
+      lo = __shfl_down(lo,delta,width);
+      hi = __shfl_down(hi,delta,width);
+      const double tmp = __hiloint2double(hi,lo);
+      return *(reinterpret_cast<const Scalar*>(&tmp));
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
+      Impl::shfl_union<Scalar> s_val;
+      Impl::shfl_union<Scalar> r_val;
+      s_val = val;
+
+      for(int i = 0; i<s_val.n; i++)
+        r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width);
+      return r_val.value();
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int shfl_up(const int &val, const int& delta, const int& width ) {
+      return __shfl_up(val,delta,width);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    float shfl_up(const float &val, const int& delta, const int& width ) {
+      return __shfl_up(val,delta,width);
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
+      Scalar tmp1 = val;
+      float tmp = *reinterpret_cast<float*>(&tmp1);
+      tmp = __shfl_up(tmp,delta,width);
+      return *reinterpret_cast<Scalar*>(&tmp);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double shfl_up(const double &val, const int& delta, const int& width ) {
+      int lo = __double2loint(val);
+      int hi = __double2hiint(val);
+      lo = __shfl_up(lo,delta,width);
+      hi = __shfl_up(hi,delta,width);
+      return __hiloint2double(hi,lo);
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
+      int lo = __double2loint(*reinterpret_cast<const double*>(&val));
+      int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
+      lo = __shfl_up(lo,delta,width);
+      hi = __shfl_up(hi,delta,width);
+      const double tmp = __hiloint2double(hi,lo);
+      return *(reinterpret_cast<const Scalar*>(&tmp));
+    }
+
+    template<typename Scalar>
+    KOKKOS_INLINE_FUNCTION
+    Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
+      Impl::shfl_union<Scalar> s_val;
+      Impl::shfl_union<Scalar> r_val;
+      s_val = val;
+
+      for(int i = 0; i<s_val.n; i++)
+        r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width);
+      return r_val.value();
+    }
+
+#else
+    template<typename Scalar>
+    inline
+    Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
+      if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<8.0.");
+      return val;
+    }
+
+    template<typename Scalar>
+    inline
+    Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
+      if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<8.0.");
+      return val;
+    }
+
+    template<typename Scalar>
+    inline
+    Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
+      if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<8.0.");
+      return val;
+    }
+#endif
+
+
+
+}
+
+#endif // KOKKOS_ENABLE_ROCM
+#endif
diff --git a/lib/kokkos/core/src/ROCm/hc_math_std.hpp b/lib/kokkos/core/src/ROCm/hc_math_std.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..56c2e634e45b7e7e6ae9af8e244a017217e21850
--- /dev/null
+++ b/lib/kokkos/core/src/ROCm/hc_math_std.hpp
@@ -0,0 +1,367 @@
+#pragma once
+
+#include "hc.hpp"
+#include <cmath>
+
+// Math functions with integer overloads will be converted to
+// this floating point type.
+#define HC_IMPLICIT_FLOAT_CONV double
+
+#ifdef __KALMAR_ACCELERATOR__
+
+#define HC_MATH_WRAPPER_1(function, arg1) \
+template<typename T> \
+inline T function(T arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1); \
+}
+
+#define KALMAR_MATH_WRAPPER_1(function, arg1) HC_MATH_WRAPPER_1(function, arg1)
+
+#define HC_MATH_WRAPPER_FP_OVERLOAD_1(function, arg1) \
+template<typename T> \
+inline \
+typename std::enable_if<std::is_integral<T>::value,HC_IMPLICIT_FLOAT_CONV>::type \
+ function(T arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1)); \
+} \
+template<typename T> \
+inline \
+typename std::enable_if<std::is_floating_point <T>::value,T>::type \
+ function(T arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1); \
+}
+
+#define KALMAR_MATH_WRAPPER_FP_OVERLOAD_1(function, arg1) HC_MATH_WRAPPER_FP_OVERLOAD_1(function, arg1) 
+
+#define HC_MATH_WRAPPER_2(function, arg1, arg2) \
+template<typename T> \
+inline T function(T arg1, T arg2) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1, arg2); \
+}
+
+#define HC_MATH_ALIAS_2(alias, function, arg1, arg2) \
+template<typename T> \
+inline T alias(T arg1, T arg2) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1, arg2); \
+}
+
+#define HC_MATH_WRAPPER_3(function, arg1, arg2, arg3) \
+template<typename T> \
+inline T function(T arg1, T arg2, T arg3) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1, arg2, arg3); \
+}
+
+#define HC_MATH_WRAPPER_TQ(function, arg1) \
+template<typename T, typename Q> \
+inline T function(Q arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1); \
+}
+
+#define HC_MATH_WRAPPER_FP_OVERLOAD_TQ(function, T, arg1) \
+template<typename Q> \
+inline \
+typename std::enable_if<std::is_integral<Q>::value,T>::type \
+function(Q arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1)); \
+}\
+template<typename Q> \
+inline \
+typename std::enable_if<std::is_floating_point<Q>::value,T>::type \
+function(Q arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1); \
+}
+
+#define HC_MATH_WRAPPER_TTQ(function, arg1, arg2) \
+template<typename T, typename Q> \
+inline T function(T arg1, Q arg2) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1, arg2); \
+}
+
+#define HC_MATH_WRAPPER_FP_OVERLOAD_TTQ(function, arg1, arg2) \
+template<typename T, typename Q> \
+inline \
+typename std::enable_if<std::is_integral<T>::value||std::is_integral<Q>::value,HC_IMPLICIT_FLOAT_CONV>::type \
+function(T arg1, Q arg2) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1),static_cast<HC_IMPLICIT_FLOAT_CONV>(arg2)); \
+}\
+template<typename T, typename Q> \
+inline \
+typename std::enable_if<std::is_floating_point<T>::value&&std::is_floating_point<Q>::value,T>::type \
+function(T arg1, Q arg2) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1,arg2); \
+}
+
+#define HC_MATH_WRAPPER_TTTQ(function, arg1, arg2, arg3) \
+template<typename T, typename Q> \
+inline T function(T arg1, T arg2, Q arg3) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1, arg2, arg3); \
+}
+
+#define HC_MATH_WRAPPER_VTQQ(function, arg1, arg2, arg3) \
+template<typename T, typename Q> \
+inline void function(T arg1, Q arg2, Q arg3) __attribute__((hc,cpu)) { \
+  hc::precise_math::function(arg1, arg2, arg3); \
+}
+
+#else
+
+#define HC_MATH_WRAPPER_1(function, arg1) \
+template<typename T> \
+inline T function(T arg1) __attribute__((hc,cpu)) { \
+  return std::function(arg1); \
+}
+
+#define KALMAR_MATH_WRAPPER_1(function, arg1) \
+template<typename T> \
+inline T function(T arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1); \
+}
+
+#define HC_MATH_WRAPPER_FP_OVERLOAD_1(function, arg1) \
+template<typename T> \
+inline \
+typename std::enable_if<std::is_integral<T>::value,HC_IMPLICIT_FLOAT_CONV>::type \
+ function(T arg1) __attribute__((hc,cpu)) { \
+  return ::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1)); \
+} \
+template<typename T> \
+inline \
+typename std::enable_if<std::is_floating_point <T>::value,T>::type \
+ function(T arg1) __attribute__((hc,cpu)) { \
+  return std::function(arg1); \
+} 
+
+#define KALMAR_MATH_WRAPPER_FP_OVERLOAD_1(function, arg1) \
+template<typename T> \
+inline \
+typename std::enable_if<std::is_integral<T>::value,HC_IMPLICIT_FLOAT_CONV>::type \
+ function(T arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1)); \
+} \
+template<typename T> \
+inline \
+typename std::enable_if<std::is_floating_point <T>::value,T>::type \
+ function(T arg1) __attribute__((hc,cpu)) { \
+  return hc::precise_math::function(arg1); \
+}
+
+#define HC_MATH_WRAPPER_2(function, arg1, arg2) \
+template<typename T> \
+inline T function(T arg1, T arg2) __attribute__((hc,cpu)) { \
+  return std::function(arg1, arg2); \
+}
+
+#define HC_MATH_ALIAS_2(alias, function, arg1, arg2) \
+template<typename T> \
+inline T alias(T arg1, T arg2) __attribute__((hc,cpu)) { \
+  return std::function(arg1, arg2); \
+}
+
+#define HC_MATH_WRAPPER_3(function, arg1, arg2, arg3) \
+template<typename T> \
+inline T function(T arg1, T arg2, T arg3) __attribute__((hc,cpu)) { \
+  return std::function(arg1, arg2, arg3); \
+}
+
+#define HC_MATH_WRAPPER_TQ(function, arg1) \
+template<typename T, typename Q> \
+inline T function(Q arg1) __attribute__((hc,cpu)) { \
+  return std::function(arg1); \
+}
+
+#define HC_MATH_WRAPPER_FP_OVERLOAD_TQ(function, T, arg1) \
+template<typename Q> \
+inline \
+typename std::enable_if<std::is_integral<Q>::value,T>::type \
+function(Q arg1) __attribute__((hc)) { \
+  return std::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1)); \
+}\
+template<typename Q> \
+inline \
+typename std::enable_if<std::is_floating_point<Q>::value,T>::type \
+function(Q arg1) __attribute__((hc)) { \
+  return std::function(arg1); \
+}
+
+#define HC_MATH_WRAPPER_TTQ(function, arg1, arg2) \
+template<typename T, typename Q> \
+inline T function(T arg1, Q arg2) __attribute__((hc,cpu)) { \
+  return std::function(arg1, arg2); \
+}
+
+#define HC_MATH_WRAPPER_FP_OVERLOAD_TTQ(function, arg1, arg2) \
+template<typename T, typename Q> \
+inline \
+typename std::enable_if<std::is_integral<T>::value||std::is_integral<Q>::value,HC_IMPLICIT_FLOAT_CONV>::type \
+function(T arg1, Q arg2) __attribute__((hc,cpu)) { \
+  return std::function(static_cast<HC_IMPLICIT_FLOAT_CONV>(arg1),static_cast<HC_IMPLICIT_FLOAT_CONV>(arg2)); \
+}\
+template<typename T, typename Q> \
+inline \
+typename std::enable_if<std::is_floating_point<T>::value&&std::is_floating_point<Q>::value,T>::type \
+function(T arg1, Q arg2) __attribute__((hc,cpu)) { \
+  return std::function(arg1,arg2); \
+}
+
+#define HC_MATH_WRAPPER_TTTQ(function, arg1, arg2, arg3) \
+template<typename T, typename Q> \
+inline T function(T arg1, T arg2, Q arg3) __attribute__((hc,cpu)) { \
+  return std::function(arg1, arg2, arg3); \
+}
+
+#define HC_MATH_WRAPPER_VTQQ(function, arg1, arg2, arg3) \
+template<typename T, typename Q> \
+inline void function(T arg1, Q arg2, Q arg3) __attribute__((hc,cpu)) { \
+  std::function(arg1, arg2, arg3); \
+}
+
+#endif
+
+
+// override global math functions
+namespace std {
+
+// following math functions are NOT available because they don't have a GPU implementation
+//
+// erfinv
+// erfcinv
+// fpclassify
+// 
+// following math functions are NOT available because they don't have a CPU implementation
+//
+// cospif
+// cospi
+// rsqrtf
+// rsqrt
+// sinpif
+// sinpi
+// tanpi
+//
+
+HC_MATH_WRAPPER_TQ(ilogbf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_TQ(ilogb, int, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_TQ(isfinite, bool, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_TQ(isinf, bool, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_TQ(isnan, bool, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_TQ(isnormal, bool, x)
+HC_MATH_WRAPPER_TQ(nanf, tagp)
+HC_MATH_WRAPPER_TQ(nan, tagp)
+//HC_MATH_WRAPPER_TQ(signbitf, x)
+HC_MATH_WRAPPER_TQ(signbit, x)
+HC_MATH_WRAPPER_TTQ(frexpf, x, exp)
+HC_MATH_WRAPPER_TTQ(frexp, x, exp)
+HC_MATH_WRAPPER_TTQ(ldexpf, x, exp)
+HC_MATH_WRAPPER_TTQ(ldexp, x, exp)
+HC_MATH_WRAPPER_TTQ(lgammaf, x, exp)
+HC_MATH_WRAPPER_TTQ(lgamma, x, exp)
+HC_MATH_WRAPPER_TTQ(modff, x, exp)
+HC_MATH_WRAPPER_TTQ(modf, x, exp)
+HC_MATH_WRAPPER_TTQ(scalbnf, x, exp)
+HC_MATH_WRAPPER_TTQ(scalbn, x, exp)
+HC_MATH_WRAPPER_TTTQ(remquof, x, y, quo)
+HC_MATH_WRAPPER_TTTQ(remquo, x, y, quo)
+HC_MATH_WRAPPER_VTQQ(sincosf, x, s, c)
+HC_MATH_WRAPPER_VTQQ(sincos, x, s, c)
+
+HC_MATH_WRAPPER_1(acosf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(acos, x)
+HC_MATH_WRAPPER_1(acoshf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(acosh, x)
+HC_MATH_WRAPPER_1(asinf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(asin, x)
+HC_MATH_WRAPPER_1(asinhf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(asinh, x)
+HC_MATH_WRAPPER_1(atanf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(atan, x)
+HC_MATH_WRAPPER_1(atanhf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(atanh, x)
+HC_MATH_WRAPPER_2(atan2f, x, y)
+HC_MATH_WRAPPER_2(atan2, x, y)
+HC_MATH_WRAPPER_1(cbrtf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(cbrt, x)
+HC_MATH_WRAPPER_1(ceilf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(ceil, x)
+HC_MATH_WRAPPER_2(copysignf, x, y)
+HC_MATH_WRAPPER_2(copysign, x, y)
+HC_MATH_WRAPPER_1(cosf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(cos, x)
+HC_MATH_WRAPPER_1(coshf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(cosh, x)
+KALMAR_MATH_WRAPPER_1(cospif, x)
+KALMAR_MATH_WRAPPER_FP_OVERLOAD_1(cospi, x)
+HC_MATH_WRAPPER_1(erff, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(erf, x)
+HC_MATH_WRAPPER_1(erfcf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(erfc, x)
+HC_MATH_WRAPPER_1(expf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(exp, x)
+HC_MATH_WRAPPER_1(exp2f, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(exp2, x)
+HC_MATH_WRAPPER_1(exp10f, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(exp10, x)
+HC_MATH_WRAPPER_1(expm1f, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(expm1, x)
+HC_MATH_WRAPPER_1(fabsf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(fabs, x)
+HC_MATH_WRAPPER_2(fdimf, x, y)
+HC_MATH_WRAPPER_2(fdim, x, y)
+HC_MATH_WRAPPER_1(floorf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(floor, x)
+HC_MATH_WRAPPER_3(fmaf, x, y, z)
+HC_MATH_WRAPPER_3(fma, x, y, z)
+HC_MATH_WRAPPER_2(fmaxf, x, y)
+HC_MATH_WRAPPER_2(fmax, x, y)
+HC_MATH_WRAPPER_2(fminf, x, y)
+HC_MATH_WRAPPER_2(fmin, x, y)
+HC_MATH_WRAPPER_2(fmodf, x, y)
+HC_MATH_WRAPPER_2(fmod, x, y)
+HC_MATH_WRAPPER_2(hypotf, x, y)
+HC_MATH_WRAPPER_2(hypot, x, y)
+HC_MATH_WRAPPER_1(logf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(log, x)
+HC_MATH_WRAPPER_1(log10f, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(log10, x)
+HC_MATH_WRAPPER_1(log2f, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(log2, x)
+HC_MATH_WRAPPER_1(log1pf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(log1p, x)
+HC_MATH_WRAPPER_1(logbf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(logb, x)
+HC_MATH_WRAPPER_1(nearbyintf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(nearbyint, x)
+HC_MATH_WRAPPER_2(nextafterf, x, y)
+HC_MATH_WRAPPER_2(nextafter, x, y)
+HC_MATH_WRAPPER_2(powf, x, y)
+HC_MATH_WRAPPER_FP_OVERLOAD_TTQ(pow,x,y)
+//HC_MATH_WRAPPER_1(rcbrtf, x)
+//HC_MATH_WRAPPER_1(rcbrt, x)
+HC_MATH_WRAPPER_2(remainderf, x, y)
+HC_MATH_WRAPPER_2(remainder, x, y)
+HC_MATH_WRAPPER_1(roundf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(round, x)
+KALMAR_MATH_WRAPPER_1(rsqrtf, x)
+KALMAR_MATH_WRAPPER_FP_OVERLOAD_1(rsqrt, x)
+HC_MATH_WRAPPER_2(scalbf, x, exp)
+HC_MATH_WRAPPER_2(scalb, x, exp)
+HC_MATH_WRAPPER_1(sinf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(sin, x)
+HC_MATH_WRAPPER_1(sinhf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(sinh, x)
+KALMAR_MATH_WRAPPER_1(sinpif, x)
+KALMAR_MATH_WRAPPER_FP_OVERLOAD_1(sinpi, x)
+HC_MATH_WRAPPER_1(sqrtf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(sqrt, x)
+HC_MATH_WRAPPER_1(tgammaf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(tgamma, x)
+HC_MATH_WRAPPER_1(tanf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(tan, x)
+HC_MATH_WRAPPER_1(tanhf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(tanh, x)
+HC_MATH_WRAPPER_1(truncf, x)
+HC_MATH_WRAPPER_FP_OVERLOAD_1(trunc, x)
+
+//HC_MATH_ALIAS_2(min, fmin, x, y)
+//HC_MATH_ALIAS_2(max, fmax, x, y)
+
+} // namespace
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
index 5b894b037b83ce8095a78ad094f9c33ef1020d42..49fca9c8551886346432658ba638450506ad5eff 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
@@ -125,6 +125,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
 //----------------------------------------------------------------------------
 // GCC native CAS supports int, long, unsigned int, unsigned long.
 // Intel native CAS support int and long with the same interface as GCC.
+#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS)
 #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
 #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
 
@@ -280,6 +281,7 @@ T atomic_compare_exchange( volatile T * const dest, const T compare, const T val
 
 #endif
 #endif
+#endif // !defined ROCM_ATOMICS
 
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
index 084c55efedd20b77dbd1fefa86e8f970a4302473..2af1737c31f8137901fbf3c4d8cab0bc1b88a3f9 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
@@ -158,6 +158,7 @@ T atomic_fetch_add( volatile T * const dest ,
 #endif
 #endif
 //----------------------------------------------------------------------------
+#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS)
 #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
 #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
 
@@ -355,6 +356,7 @@ T atomic_fetch_add( volatile T * const dest , const T val )
 
 #endif
 #endif
+#endif // !defined ROCM_ATOMICS
 //----------------------------------------------------------------------------
 
 // Simpler version of atomic_fetch_add without the fetch
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
index 038cc13e9aa400d93679febb34370bb0fb729adc..b7c14052eb4519f522c5b6a2a724f5aa5ad976f5 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
@@ -135,6 +135,7 @@ T atomic_fetch_sub( volatile T * const dest ,
 #endif
 #endif
 //----------------------------------------------------------------------------
+#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS)
 #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
 #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
 
@@ -263,6 +264,8 @@ T atomic_fetch_sub( volatile T * const dest , const T val )
 
 #endif
 #endif
+#endif // !defined ROCM_ATOMICS
+
 // Simpler version of atomic_fetch_sub without the fetch
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
index 65578156d5be52232785d93744c9025c814bd6a8..f47ba1a98a9225aa50f54ebfbb27ab052dbd2b3b 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
@@ -238,7 +238,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   *dest = Oper::apply(return_val, val);
   Impl::unlock_address_host_space( (void*) dest );
   return return_val;
-#else
+#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
   // This is a way to (hopefully) avoid dead lock in a warp
   T return_val;
   int done = 0;
@@ -277,7 +277,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
   *dest = return_val;
   Impl::unlock_address_host_space( (void*) dest );
   return return_val;
-#else
+#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
   int done = 0;
diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
index df16b3738b7abfea350ca4375c4b0e84973877b4..3d3029535e9c770b65cbe07af5f6256fd01ada0d 100644
--- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
@@ -62,6 +62,8 @@ int bit_first_zero( unsigned i ) noexcept
 
 #if defined( __CUDA_ARCH__ )
   return full != i ? __ffs( ~i ) - 1 : -1 ;
+#elif defined( __HCC_ACCELERATOR__ )
+  return full != i ? (int)hc::__firstbit_u32_u32(~i) : -1 ;
 #elif defined( KOKKOS_COMPILER_INTEL )
   return full != i ? _bit_scan_forward( ~i ) : -1 ;
 #elif defined( KOKKOS_COMPILER_IBM )
@@ -82,6 +84,8 @@ int bit_scan_forward( unsigned i )
 {
 #if defined( __CUDA_ARCH__ )
   return __ffs(i) - 1;
+#elif defined( __HCC_ACCELERATOR__ )
+  return  (int)hc::__firstbit_u32_u32(i);
 #elif defined( KOKKOS_COMPILER_INTEL )
   return _bit_scan_forward(i);
 #elif defined( KOKKOS_COMPILER_IBM )
@@ -106,6 +110,8 @@ int bit_scan_reverse( unsigned i )
   enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) };
 #if defined( __CUDA_ARCH__ )
   return shift - __clz(i);
+#elif defined( __HCC_ACCELERATOR__ )
+  return  (int)hc::__firstbit_u32_u32(i);
 #elif defined( KOKKOS_COMPILER_INTEL )
   return _bit_scan_reverse(i);
 #elif defined( KOKKOS_COMPILER_IBM )
@@ -130,6 +136,8 @@ int bit_count( unsigned i )
 {
 #if defined( __CUDA_ARCH__ )
   return __popc(i);
+#elif defined( __HCC_ACCELERATOR__ )
+  return  (int)hc::__popcount_u32_b32(i);
 #elif defined ( __INTEL_COMPILER )
   return _popcnt32(i);
 #elif defined( KOKKOS_COMPILER_IBM )
diff --git a/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp b/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp
index 92111c3c59263eeeeccdb59982febfa7755d7d62..c59c59d4979247a9131835c8ee41393a5f5e3836 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp
@@ -72,6 +72,10 @@ uint64_t clock_tic(void) noexcept
 
   return clock64();
 
+#elif defined(__HCC_ACCELERATOR__)
+    // Get clock register
+    return hc::__clock_u64();
+
 #elif defined( __i386__ ) || defined( __x86_64 )
 
   // Return value of 64-bit hi-res clock register.
diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
index f52cc469ace10cd795b3fa1389732e292f4357ee..ab6cffc7c3c18ef03b3f2f22689c7b8b8923e909 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -80,7 +80,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
   const int num_threads = args.num_threads;
   const int use_numa = args.num_numa;
 #endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS )
-#if defined( KOKKOS_ENABLE_CUDA )
+#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_ROCM )
   const int use_gpu = args.device_id;
 #endif // defined( KOKKOS_ENABLE_CUDA )
 
@@ -162,6 +162,18 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
   }
 #endif
 
+#if defined( KOKKOS_ENABLE_ROCM )
+  if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
+    if (use_gpu > -1) {
+      Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice( use_gpu ) );
+    }
+    else {
+      Kokkos::Experimental::ROCm::initialize();
+    }
+    std::cout << "Kokkos::initialize() fyi: ROCm enabled and initialized" << std::endl ;
+  }
+#endif
+
 #if defined(KOKKOS_ENABLE_PROFILING)
     Kokkos::Profiling::initialize();
 #endif
@@ -181,6 +193,13 @@ void finalize_internal( const bool all_spaces = false )
   }
 #endif
 
+#if defined( KOKKOS_ENABLE_ROCM )
+  if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
+    if(Kokkos::Experimental::ROCm::is_initialized())
+      Kokkos::Experimental::ROCm::finalize();
+  }
+#endif
+
 #if defined( KOKKOS_ENABLE_OPENMPTARGET )
   if( std::is_same< Kokkos::Experimental::OpenMPTarget , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
     if(Kokkos::Experimental::OpenMPTarget::is_initialized())
@@ -225,6 +244,12 @@ void fence_internal()
   }
 #endif
 
+#if defined( KOKKOS_ENABLE_ROCM )
+  if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value ) {
+    Kokkos::Experimental::ROCm::fence();
+  }
+#endif
+
 #if defined( KOKKOS_ENABLE_OPENMP )
   if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
       std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp
index 4bc2637c57980502c4fe7b8a5e1c4f86db43bb86..b4390f14a16d14bd7fafcfbecb805071bec8696e 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp
@@ -75,7 +75,7 @@ void abort( const char * const message ) {
 #ifdef __CUDA_ARCH__
   Kokkos::Impl::cuda_abort(message);
 #else
-  #ifndef KOKKOS_ENABLE_OPENMPTARGET
+  #if !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(__HCC_ACCELERATOR__)
     Kokkos::Impl::host_abort(message);
   #endif
 #endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp
index d2446bde09ad7697fb23d11d469fe1566dde8fac..047b262422853deb7e0c95a94f0544cc6d664240 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp
@@ -275,7 +275,9 @@ int HostThreadTeamData::rendezvous( int64_t * const buffer
       for ( int i = 0 ; i < end ; ++i ) {
         ((int8_t*) & value )[i] = int8_t( step );
       }
-
+      // Do not REMOVE this store fence!!!
+      // Makes stuff hang on GCC with more than 8 threads
+      store_fence();
       spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
                           , value );
     }
diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
index 7a887a9e29f55372a65c5c2653a14d2f0d4c5ec5..eedf3d559eaabfe960dd6e3214efd194bb7eb0b6 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
@@ -53,6 +53,8 @@ void memory_fence()
 {
 #if defined( __CUDA_ARCH__ )
   __threadfence();
+#elif defined( KOKKOS_ENABLE_ROCM_ATOMICS )
+  amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
 #elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
   asm volatile (
 	  "mfence" ::: "memory"
diff --git a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
index 15ce6964a0369980df0124021062b08caa5f9e39..a408199088c5e0b7bd67e3944b72af35d96cef55 100644
--- a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
@@ -140,12 +140,6 @@
 #endif
 #endif
 
-#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
-#ifndef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
-#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
-#endif
-#endif
-
 #ifdef KOKKOS_HAVE_CXX1Z
 #ifndef KOKKOS_ENABLE_CXX1Z
 #define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
index 1974f7e1cae62eca6a1f692476786623f4993800..0cce45b2e710f337f26b2790afe014722a2386e7 100644
--- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
@@ -456,10 +456,11 @@ void TaskQueue< ExecSpace >::schedule_aggregate
   //     task->m_next == member of linked list (queue)
 
 #if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING
-  printf( "schedule_aggregate( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
+  printf( "schedule_aggregate( 0x%lx { 0x%lx 0x%lx %d %d %d %d }\n"
         , uintptr_t(task)
         , uintptr_t(task->m_wait)
         , uintptr_t(task->m_next)
+        , task->m_dep_count
         , task->m_task_type
         , task->m_priority
         , task->m_ref_count );
@@ -597,7 +598,6 @@ void TaskQueue< ExecSpace >::complete
         , task->m_task_type
         , task->m_priority
         , task->m_ref_count );
-  fflush( stdout );
 #endif
 
   task_root_type volatile & t = *task ;
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
index d346f9e6393530f4489d027db2c4d7f9892b15d1..b2adcc9f067582dacd7474cb366be0da2cde6482 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
@@ -1015,8 +1015,13 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
   constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
     : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
     {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" );
+      static_assert(
+        ( DimRHS::rank == 0 &&
+          dimension_type::rank == 0 ) ||
+        ( DimRHS::rank == 1 &&
+          dimension_type::rank == 1 &&
+          dimension_type::rank_dynamic == 1 )
+        , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank <= 1" );
     }
 
   template< class DimRHS >
@@ -1024,8 +1029,13 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
   ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
     : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
     {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" );
+      static_assert( 
+        ( DimRHS::rank == 0 &&
+          dimension_type::rank == 0 ) ||
+        ( DimRHS::rank == 1 &&
+          dimension_type::rank == 1 &&
+          dimension_type::rank_dynamic == 1 )
+        , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank <= 1" );
       if ( rhs.m_stride.S0 != 1 ) {
         Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride  requires stride == 1" );
       }
@@ -1493,8 +1503,13 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
   constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
     : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
     {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" );
+      static_assert(
+       ( DimRHS::rank == 0 &&
+         dimension_type::rank == 0 ) ||
+       ( DimRHS::rank == 1 &&
+         dimension_type::rank == 1 &&
+         dimension_type::rank_dynamic == 1 )
+      , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank <= 1" );
     }
 
   template< class DimRHS >
@@ -1502,8 +1517,13 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
   ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
     : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
     {
-      static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
-                   , "ViewOffset LayoutLeft/Right and LayoutStride are only compatible when rank == 1" );
+      static_assert(
+       ( DimRHS::rank == 0 &&
+         dimension_type::rank == 0 ) ||
+       ( DimRHS::rank == 1 &&
+         dimension_type::rank == 1 &&
+         dimension_type::rank_dynamic == 1 )
+      , "ViewOffset LayoutRight and LayoutString are only compatible when rank <= 1" );
       if ( rhs.m_stride.S0 != 1 ) {
         Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft/Right from LayoutStride  requires stride == 1" );
       }
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp
index 5a8600e0aee34719e512229292ecb37d9b950657..37367f68e4bbb1c7e9a01c395ab7fdf7729bdae4 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp
@@ -143,7 +143,7 @@ public:
 
   //----------------------------------------
 
-  ~ViewOffset() = default ;
+  KOKKOS_FUNCTION_DEFAULTED ~ViewOffset() = default ;
   KOKKOS_INLINE_FUNCTION ViewOffset() = default ;
   KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ;
   KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ;
diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt
index 475b6bb48a2eee3583da12aa4baae3f3ab4e8536..8aeae1199f46b701d7a79fe2541821ba09b2f7f9 100644
--- a/lib/kokkos/core/unit_test/CMakeLists.txt
+++ b/lib/kokkos/core/unit_test/CMakeLists.txt
@@ -57,6 +57,7 @@ IF(Kokkos_ENABLE_Serial)
       serial/TestSerial_ViewMapping_b.cpp
       serial/TestSerial_ViewMapping_subview.cpp
       serial/TestSerial_ViewOfClass.cpp
+      serial/TestSerial_Crs.cpp
       serial/TestSerial_WorkGraph.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
@@ -103,6 +104,7 @@ IF(Kokkos_ENABLE_Pthread)
       threads/TestThreads_ViewMapping_b.cpp
       threads/TestThreads_ViewMapping_subview.cpp
       threads/TestThreads_ViewOfClass.cpp
+      threads/TestThreads_Crs.cpp
       threads/TestThreads_WorkGraph.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
@@ -149,6 +151,7 @@ IF(Kokkos_ENABLE_OpenMP)
       openmp/TestOpenMP_ViewMapping_b.cpp
       openmp/TestOpenMP_ViewMapping_subview.cpp
       openmp/TestOpenMP_ViewOfClass.cpp
+      openmp/TestOpenMP_Crs.cpp
       openmp/TestOpenMP_WorkGraph.cpp
       openmp/TestOpenMP_UniqueToken.cpp
     COMM serial mpi
@@ -241,6 +244,7 @@ IF(Kokkos_ENABLE_Cuda)
       cuda/TestCuda_ViewMapping_b.cpp
       cuda/TestCuda_ViewMapping_subview.cpp
       cuda/TestCuda_ViewOfClass.cpp
+      cuda/TestCuda_Crs.cpp
       cuda/TestCuda_WorkGraph.cpp
     COMM serial mpi
     NUM_MPI_PROCS 1
diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile
index c877aa7dd2d252e0d19970becbc4deaac41f2d45..07859f7ac37afc1bf209feba1ece15214745a7f3 100644
--- a/lib/kokkos/core/unit_test/Makefile
+++ b/lib/kokkos/core/unit_test/Makefile
@@ -10,6 +10,8 @@ vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp
 vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmptarget
 vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads
 vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda
+vpath %.cpp ${KOKKOS_PATH}/core/unit_test/rocm
+
 
 TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp)
 TEST_HEADERS += $(wildcard $(KOKKOS_PATH)/core/unit_test/*/*.hpp)
@@ -62,6 +64,7 @@ endif
 	OBJ_CUDA += TestCuda_TeamReductionScan.o
 	OBJ_CUDA += TestCuda_Other.o
 	OBJ_CUDA += TestCuda_MDRange.o
+	OBJ_CUDA += TestCuda_Crs.o
 	OBJ_CUDA += TestCuda_Task.o TestCuda_WorkGraph.o
 	OBJ_CUDA += TestCuda_Spaces.o
 	OBJ_CUDA += TestCuda_UniqueToken.o
@@ -71,6 +74,58 @@ endif
 	TEST_TARGETS += test-cuda
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
+        OBJ_ROCM = UnitTestMainInit.o gtest-all.o
+        OBJ_ROCM += TestROCm_Init.o
+        OBJ_ROCM += TestROCm_Complex.o
+#        OBJ_ROCM += TestROCm_RangePolicy.o
+# rocm.range_scan locking up
+        OBJ_ROCM += TestROCm_AtomicOperations.o
+        OBJ_ROCM += TestROCm_Atomics.o
+# complex failing
+        OBJ_ROCM += TestROCm_AtomicViews.o
+        OBJ_ROCM += TestROCm_Other.o
+# rocm.memory_pool
+        OBJ_ROCM += TestROCm_Scan.o
+        OBJ_ROCM += TestROCm_SharedAlloc.o
+        OBJ_ROCM += TestROCm_SubView_a.o
+#        OBJ_ROCM += TestROCm_SubView_b.o
+# relies on host accessable device memory
+#        OBJ_ROCM += TestROCm_SubView_c01.o
+#        OBJ_ROCM += TestROCm_SubView_c02.o
+#        OBJ_ROCM += TestROCm_SubView_c03.o
+#        OBJ_ROCM += TestROCm_SubView_c04.o
+#        OBJ_ROCM += TestROCm_SubView_c05.o
+#        OBJ_ROCM += TestROCm_SubView_c06.o
+#        OBJ_ROCM += TestROCm_SubView_c07.o
+#        OBJ_ROCM += TestROCm_SubView_c08.o
+#        OBJ_ROCM += TestROCm_SubView_c09.o
+#        OBJ_ROCM += TestROCm_SubView_c10.o
+#        OBJ_ROCM += TestROCm_SubView_c11.o
+#        OBJ_ROCM += TestROCm_SubView_c12.o
+# all of the above use UVM or Host accessable memory
+#        OBJ_ROCM += TestROCm_Team.o
+# compile fails
+#        OBJ_ROCM += TestROCm_TeamReductionScan.o
+# compile fails
+#        OBJ_ROCM += TestROCm_TeamScratch.o
+# compile fails
+        OBJ_ROCM += TestROCm_ViewAPI_b.o
+#  test fail in view_api
+        OBJ_ROCM += TestROCm_ViewMapping_a.o
+        OBJ_ROCM += TestROCm_ViewMapping_b.o
+        OBJ_ROCM += TestROCm_ViewMapping_subview.o
+	OBJ_ROCM += TestROCmHostPinned_ViewAPI.o
+	OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o 
+	OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o 
+	OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o
+        OBJ_ROCM += TestROCm_ViewOfClass.o
+	OBJ_ROCM += TestROCm_Spaces.o
+     
+        TARGETS += KokkosCore_UnitTest_ROCm
+        TEST_TARGETS += test-rocm
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
 
 	OBJ_THREADS = UnitTestMainInit.o gtest-all.o
@@ -122,6 +177,7 @@ endif
 	OBJ_OPENMP += TestOpenMP_TeamReductionScan.o
 	OBJ_OPENMP += TestOpenMP_Other.o
 	OBJ_OPENMP += TestOpenMP_MDRange.o
+	OBJ_OPENMP += TestOpenMP_Crs.o
 	OBJ_OPENMP += TestOpenMP_Task.o TestOpenMP_WorkGraph.o
 	OBJ_OPENMP += TestOpenMP_UniqueToken.o
 	
@@ -209,7 +265,11 @@ endif
         OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o
         OBJ_SERIAL += TestSerial_TeamReductionScan.o
         OBJ_SERIAL += TestSerial_Other.o
+        #HCC_WORKAROUND
+        ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
         OBJ_SERIAL += TestSerial_MDRange.o
+        endif
+        OBJ_SERIAL += TestSerial_Crs.o
         OBJ_SERIAL += TestSerial_Task.o TestSerial_WorkGraph.o
 	
 	TARGETS += KokkosCore_UnitTest_Serial
@@ -223,8 +283,10 @@ TEST_TARGETS += test-hwloc
 
 OBJ_DEFAULT = UnitTestMainInit.o gtest-all.o
 ifneq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
+ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
   OBJ_DEFAULT += TestDefaultDeviceType.o TestDefaultDeviceType_a.o TestDefaultDeviceType_b.o TestDefaultDeviceType_c.o TestDefaultDeviceType_d.o
 endif
+endif
 
 TARGETS += KokkosCore_UnitTest_Default
 TEST_TARGETS += test-default
@@ -239,6 +301,9 @@ TEST_TARGETS += ${INITTESTS_TEST_TARGETS}
 KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Cuda
 
+KokkosCore_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_ROCm
+
 KokkosCore_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Threads
 
@@ -272,6 +337,9 @@ ${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDe
 test-cuda: KokkosCore_UnitTest_Cuda
 	./KokkosCore_UnitTest_Cuda
 
+test-rocm: KokkosCore_UnitTest_ROCm
+	./KokkosCore_UnitTest_ROCm
+
 test-threads: KokkosCore_UnitTest_Threads
 	./KokkosCore_UnitTest_Threads
 
diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp
index fddcc4a2e6ce76925b68caf7908e26f0efffa45d..f0391134ba9c772546c62434e324cb0749b952e0 100644
--- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp
+++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp
@@ -43,6 +43,19 @@
 
 #include <Kokkos_Core.hpp>
 
+#if defined(KOKKOS_ENABLE_CUDA) && \
+    ( !defined(KOKKOS_ENABLE_CUDA_LAMBDA) || \
+      (  ( defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_OPENMP) ) && \
+         (  (CUDA_VERSION < 8000) && defined( __NVCC__ ))))
+  #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
+    #error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA shouldn't be defined"
+  #endif
+#else
+  #if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
+    #error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA should be defined"
+  #endif
+#endif
+
 #define KOKKOS_PRAGMA_UNROLL(a)
 
 namespace TestCompilerMacros {
diff --git a/lib/kokkos/core/unit_test/TestComplex.hpp b/lib/kokkos/core/unit_test/TestComplex.hpp
index 36f05612e0972a043af9622801bc1855c58cc62b..ce5537fed362a43eac1b57c8f63a06d7329c1ff4 100644
--- a/lib/kokkos/core/unit_test/TestComplex.hpp
+++ b/lib/kokkos/core/unit_test/TestComplex.hpp
@@ -158,7 +158,8 @@ struct TestComplexBasicMath {
     d_results(1) = a-b;
     d_results(2) = a*b;
     d_results(3) = a/b;
-    d_results(4) = Kokkos::complex<double>(1.0,2.0);
+    d_results(4).real(1.0);
+    d_results(4).imag(2.0);
     d_results(4) += a;
     d_results(5) = Kokkos::complex<double>(1.0,2.0);
     d_results(5) -= a;
@@ -173,7 +174,8 @@ struct TestComplexBasicMath {
     d_results(9) = a-c;
     d_results(10) = a*c;
     d_results(11) = a/c;
-    d_results(12) = Kokkos::complex<double>(1.0,2.0);
+    d_results(12).real(1.0);
+    d_results(12).imag(2.0);
     d_results(12) += c;
     d_results(13) = Kokkos::complex<double>(1.0,2.0);
     d_results(13) -= c;
diff --git a/lib/kokkos/core/unit_test/TestCrs.hpp b/lib/kokkos/core/unit_test/TestCrs.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..90f40368688c4ab9902cbd246b0fb3ea1acc9adf
--- /dev/null
+++ b/lib/kokkos/core/unit_test/TestCrs.hpp
@@ -0,0 +1,98 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <vector>
+
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+namespace {
+
+template< class ExecSpace >
+struct CountFillFunctor {
+  KOKKOS_INLINE_FUNCTION
+  std::int32_t operator()(std::int32_t row, std::int32_t* fill) const {
+    auto n = (row % 4) + 1;
+    if (fill) {
+      for (std::int32_t j = 0; j < n; ++j) {
+        fill[j] = j + 1;
+      }
+    }
+    return n;
+  }
+};
+
+template< class ExecSpace >
+void test_count_fill(std::int32_t nrows) {
+  Kokkos::Experimental::Crs<std::int32_t, ExecSpace, void, std::int32_t> graph;
+  Kokkos::Experimental::count_and_fill_crs(graph, nrows, CountFillFunctor<ExecSpace>());
+  ASSERT_EQ(graph.numRows(), nrows);
+  auto row_map = Kokkos::create_mirror_view(graph.row_map);
+  Kokkos::deep_copy(row_map, graph.row_map);
+  auto entries = Kokkos::create_mirror_view(graph.entries);
+  Kokkos::deep_copy(entries, graph.entries);
+  for (std::int32_t row = 0; row < nrows; ++row) {
+    auto n = (row % 4) + 1;
+    ASSERT_EQ(row_map(row + 1) - row_map(row), n);
+    for (std::int32_t j = 0; j < n; ++j) {
+      ASSERT_EQ(entries(row_map(row) + j), j + 1);
+    }
+  }
+}
+
+} // anonymous namespace
+
+TEST_F( TEST_CATEGORY, crs_count_fill )
+{
+  test_count_fill<TEST_EXECSPACE>(0);
+  test_count_fill<TEST_EXECSPACE>(1);
+  test_count_fill<TEST_EXECSPACE>(2);
+  test_count_fill<TEST_EXECSPACE>(3);
+  test_count_fill<TEST_EXECSPACE>(13);
+  test_count_fill<TEST_EXECSPACE>(100);
+  test_count_fill<TEST_EXECSPACE>(1000);
+  test_count_fill<TEST_EXECSPACE>(10000);
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp
index 3cea1ad4a00e5476df6fbd5c20518c5426884cf6..e6857a4d2f6937be1489ba3b54ed32524e520779 100644
--- a/lib/kokkos/core/unit_test/TestRange.hpp
+++ b/lib/kokkos/core/unit_test/TestRange.hpp
@@ -301,19 +301,19 @@ TEST_F( TEST_CATEGORY, range_scan )
 {
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(0); f.test_scan(); }
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(0); f.test_scan(); }
-#ifndef KOKKOS_ENABLE_CUDA
+#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM)
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(0); f.test_dynamic_policy(); }
 #endif
 
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(2); f.test_scan(); }
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(3); f.test_scan(); }
-#ifndef KOKKOS_ENABLE_CUDA
+#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM)
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(3); f.test_dynamic_policy(); }
 #endif
 
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(1000); f.test_scan(); }
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(1001); f.test_scan(); }
-#ifndef KOKKOS_ENABLE_CUDA
+#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM)
   { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(1001); f.test_dynamic_policy(); }
 #endif
 }
diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp
index fa7669c5edbb87a6850dc8191cf93a2629949080..823f0c99a81c3bd80b6b9c0d49983c13d30663b2 100644
--- a/lib/kokkos/core/unit_test/TestScan.hpp
+++ b/lib/kokkos/core/unit_test/TestScan.hpp
@@ -92,7 +92,7 @@ struct TestScan {
     Kokkos::deep_copy( errors_a, 0 );
     errors = errors_a;
 
-    parallel_scan( N , *this );
+    Kokkos::parallel_scan( N , *this );
   }
 
   TestScan( const WorkSpec & Start , const WorkSpec & N )
@@ -103,7 +103,7 @@ struct TestScan {
     Kokkos::deep_copy( errors_a, 0 );
     errors = errors_a;
 
-    parallel_scan( exec_policy( Start , N ) , *this );
+    Kokkos::parallel_scan( exec_policy( Start , N ) , *this );
   }
 
   static void test_range( const WorkSpec & begin, const WorkSpec & end )
diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp
index 4e6654385792b1b08c34e97c4e4011f816cebbf1..a3f59a2b9e77203eb008fe9b402b0e1deada9df0 100644
--- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp
+++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp
@@ -250,34 +250,23 @@ struct TestTaskDependence {
     const int n = CHUNK < m_count ? CHUNK : m_count;
 
     if ( 1 < m_count ) {
-      // Test use of memory pool for temporary allocation:
 
-      // Raw allocation:
-      future_type * const f =
-        (future_type *) m_sched.memory()->allocate( sizeof(future_type) * n );
+      const int increment = ( m_count + n - 1 ) / n;
 
-      // In-place construction:
-      for ( int i = 0; i < n; ++i ) new(f+i) future_type();
+      future_type f =
+        m_sched.when_all( n , [this,increment]( int i ) {
+          const long inc   = increment ;
+          const long begin = i * inc ;
+          const long count = begin + inc < m_count ? inc : m_count - begin ;
 
-      const int inc = ( m_count + n - 1 ) / n;
-
-      for ( int i = 0; i < n; ++i ) {
-        long begin = i * inc;
-        long count = begin + inc < m_count ? inc : m_count - begin;
-
-        f[i] = Kokkos::task_spawn( Kokkos::TaskSingle( m_sched )
-                                 , TestTaskDependence( count, m_sched, m_accum ) );
-      }
+          return Kokkos::task_spawn
+            ( Kokkos::TaskSingle( m_sched )
+            , TestTaskDependence( count, m_sched, m_accum ) );
+        });
 
       m_count = 0;
 
-      Kokkos::respawn( this, Kokkos::when_all( f, n ) );
-
-      // In-place destruction to release future:
-      for ( int i = 0; i < n; ++i ) (f+i)->~future_type();
-
-      // Raw deallocation:
-      m_sched.memory()->deallocate( f , sizeof(future_type) * n );
+      Kokkos::respawn( this, f );
     }
     else if ( 1 == m_count ) {
       Kokkos::atomic_increment( & m_accum() );
@@ -372,7 +361,9 @@ struct TestTaskTeam {
                                                  , begin - 1 )
                                    );
 
+        #ifndef __HCC_ACCELERATOR__
         assert( !future.is_null() );
+        #endif
 
         Kokkos::respawn( this, future );
       }
@@ -664,6 +655,7 @@ TEST_F( TEST_CATEGORY, task_fib )
 TEST_F( TEST_CATEGORY, task_depend )
 {
   for ( int i = 0; i < 25; ++i ) {
+printf("\nTest::task_depend %d\n",i);
     TestTaskScheduler::TestTaskDependence< TEST_EXECSPACE >::run( i );
   }
 }
diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp
index 232163f11e1f86b13ce986837c5d17ffa4431460..721ffd8378d7d8bc24d92c41689c332520e5af5f 100644
--- a/lib/kokkos/core/unit_test/TestViewAPI.hpp
+++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp
@@ -1324,10 +1324,14 @@ TEST_F( TEST_CATEGORY, view_remap )
   #ifdef KOKKOS_ENABLE_CUDA
     #define EXECSPACE std::conditional<std::is_same<TEST_EXECSPACE,Kokkos::Cuda>::value,Kokkos::CudaHostPinnedSpace,TEST_EXECSPACE>::type
   #else
-    #ifdef KOKKOS_ENABLE_OPENMPTARGET
-      #define EXECSPACE Kokkos::HostSpace
+    #ifdef KOKKOS_ENABLE_ROCM
+      #define EXECSPACE std::conditional<std::is_same<TEST_EXECSPACE,Kokkos::Experimental::ROCm>::value,Kokkos::Experimental::ROCmHostPinnedSpace,TEST_EXECSPACE>::type
     #else
-      #define EXECSPACE TEST_EXECSPACE
+      #if defined(KOKKOS_ENABLE_OPENMPTARGET)
+        #define EXECSPACE Kokkos::HostSpace
+      #else
+        #define EXECSPACE TEST_EXECSPACE
+      #endif
     #endif
   #endif
 
@@ -1375,4 +1379,14 @@ TEST_F( TEST_CATEGORY, view_remap )
   }
 }
 
+TEST_F( TEST_CATEGORY, view_mirror_nonconst )
+{
+  Kokkos::View<int*, TEST_EXECSPACE> d_view("d_view", 10);
+  Kokkos::View<const int*, TEST_EXECSPACE> d_view_const = d_view;
+  auto h_view = Kokkos::create_mirror(d_view_const);
+  Kokkos::deep_copy(h_view, d_view_const);
+  auto h_view2 = Kokkos::create_mirror(Kokkos::HostSpace(), d_view_const);
+  Kokkos::deep_copy(h_view2, d_view_const);
+}
+
 } // namespace Test
diff --git a/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp b/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp
index 305ddb2a1d09f8535f42b456ca7e9ff22ce60a34..61b43a588ae4badda6c19680d2a3d4882fea649c 100644
--- a/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp
+++ b/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp
@@ -107,6 +107,7 @@ struct TestViewCtorProp_EmbeddedDim {
         Kokkos::deep_copy( hcv1, cv1 );
 
         ASSERT_EQ( (std::is_same< CommonViewValueType, double >::value) , true ) ;
+        ASSERT_EQ( (std::is_same< typename decltype(view_alloc_arg)::scalar_array_type, CommonViewValueType>::value) , true ) ;
       #if 0
       // debug output
       for ( int i = 0; i < N0*N1; ++i ) {
diff --git a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp
index 810ae72e7367b4d30a62736756e1cf88c2827509..f963875ae1b67ce441c4b963ea20ad646f0452bc 100644
--- a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp
+++ b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp
@@ -1012,7 +1012,7 @@ void test_view_mapping()
     ASSERT_EQ( a.use_count(), 1 );
     ASSERT_EQ( b.use_count(), 0 );
 
-#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA )
+#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && !defined( KOKKOS_ENABLE_ROCM )
     // Cannot launch host lambda when CUDA lambda is enabled.
 
     typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space host_exec_space;
@@ -1021,6 +1021,7 @@ void test_view_mapping()
       // 'a' is captured by copy, and the capture mechanism converts 'a' to an
       // unmanaged copy.  When the parallel dispatch accepts a move for the
       // lambda, this count should become 1.
+ 
       ASSERT_EQ( a.use_count(), 2 );
       V x = a;
       ASSERT_EQ( a.use_count(), 2 );
diff --git a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp
index ee1c96b423e15185d3c6e4f33c9d4f9ad8e55709..d0cbfe9e7d4449f6897798d1cd09d714603c96dc 100644
--- a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp
+++ b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp
@@ -133,11 +133,15 @@ TEST_F( TEST_CATEGORY , view_mapping_atomic )
   f.run();
 }
 
+}
+
 /*--------------------------------------------------------------------------*/
+
 namespace Test {
-struct ValueType {
+
+struct MappingClassValueType {
     KOKKOS_INLINE_FUNCTION
-    ValueType() 
+    MappingClassValueType() 
     {
 #if 0
 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
@@ -150,7 +154,7 @@ struct ValueType {
 #endif
     }
     KOKKOS_INLINE_FUNCTION
-    ~ValueType()
+    ~MappingClassValueType()
     {
 #if 0
 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
@@ -163,7 +167,6 @@ struct ValueType {
 #endif
     }
   };
-}
 
 template< class Space >
 void test_view_mapping_class_value()
@@ -172,7 +175,7 @@ void test_view_mapping_class_value()
 
   ExecSpace::fence();
   {
-    Kokkos::View< Test::ValueType, ExecSpace > a( "a" );
+    Kokkos::View< MappingClassValueType, ExecSpace > a( "a" );
     ExecSpace::fence();
   }
   ExecSpace::fence();
@@ -184,3 +187,74 @@ TEST_F( TEST_CATEGORY , view_mapping_class_value )
 }
 
 }
+
+/*--------------------------------------------------------------------------*/
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY , view_mapping_assignable )
+{
+  typedef TEST_EXECSPACE exec_space ;
+
+  { // Assignment of rank-0 Left = Right
+    typedef Kokkos::ViewTraits<int,Kokkos::LayoutLeft, exec_space> dst_traits ;
+    typedef Kokkos::ViewTraits<int,Kokkos::LayoutRight,exec_space> src_traits ;
+    typedef Kokkos::Impl::ViewMapping<dst_traits,src_traits,void> mapping ;
+    static_assert( mapping::is_assignable , "" );
+
+    Kokkos::View<int,Kokkos::LayoutRight,exec_space> src ;
+    Kokkos::View<int,Kokkos::LayoutLeft,exec_space> dst( src );
+    dst = src ;
+  }
+
+  { // Assignment of rank-0 Right = Left
+    typedef Kokkos::ViewTraits<int,Kokkos::LayoutRight,exec_space> dst_traits ;
+    typedef Kokkos::ViewTraits<int,Kokkos::LayoutLeft, exec_space> src_traits ;
+    typedef Kokkos::Impl::ViewMapping<dst_traits,src_traits,void> mapping ;
+    static_assert( mapping::is_assignable , "" );
+
+    Kokkos::View<int,Kokkos::LayoutLeft,exec_space> src ;
+    Kokkos::View<int,Kokkos::LayoutRight,exec_space> dst( src );
+    dst = src ;
+  }
+
+  { // Assignment of rank-1 Left = Right
+    typedef Kokkos::ViewTraits<int*,Kokkos::LayoutLeft, exec_space> dst_traits ;
+    typedef Kokkos::ViewTraits<int*,Kokkos::LayoutRight,exec_space> src_traits ;
+    typedef Kokkos::Impl::ViewMapping<dst_traits,src_traits,void> mapping ;
+    static_assert( mapping::is_assignable , "" );
+
+    Kokkos::View<int*,Kokkos::LayoutRight,exec_space> src ;
+    Kokkos::View<int*,Kokkos::LayoutLeft,exec_space> dst( src );
+    dst = src ;
+  }
+
+  { // Assignment of rank-1 Right = Left
+    typedef Kokkos::ViewTraits<int*,Kokkos::LayoutRight,exec_space> dst_traits ;
+    typedef Kokkos::ViewTraits<int*,Kokkos::LayoutLeft, exec_space> src_traits ;
+    typedef Kokkos::Impl::ViewMapping<dst_traits,src_traits,void> mapping ;
+    static_assert( mapping::is_assignable , "" );
+
+    Kokkos::View<int*,Kokkos::LayoutLeft,exec_space> src ;
+    Kokkos::View<int*,Kokkos::LayoutRight,exec_space> dst( src );
+    dst = src ;
+  }
+
+  { // Assignment of rank-2 Left = Right
+    typedef Kokkos::ViewTraits<int**,Kokkos::LayoutLeft, exec_space> dst_traits ;
+    typedef Kokkos::ViewTraits<int**,Kokkos::LayoutRight,exec_space> src_traits ;
+    typedef Kokkos::Impl::ViewMapping<dst_traits,src_traits,void> mapping ;
+    static_assert( ! mapping::is_assignable , "" );
+  }
+
+  { // Assignment of rank-2 Right = Left
+    typedef Kokkos::ViewTraits<int**,Kokkos::LayoutRight,exec_space> dst_traits ;
+    typedef Kokkos::ViewTraits<int**,Kokkos::LayoutLeft, exec_space> src_traits ;
+    typedef Kokkos::Impl::ViewMapping<dst_traits,src_traits,void> mapping ;
+    static_assert( ! mapping::is_assignable , "" );
+  }
+
+}
+
+}
+
diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Crs.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Crs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a90e88933e12e5b9e0ac899e401a9958ab7d0d8d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Crs.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cuda/TestCuda_Category.hpp>
+#include <TestCrs.hpp>
diff --git a/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp
index e11996e8f90111b1fc2cd95727842eeb42466826..4500392b27081a7f80c5da98cd44204ac1086f23 100644
--- a/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp
+++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp
@@ -46,6 +46,7 @@
 #include <Kokkos_Core.hpp>
 
 #if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ )
+#if !defined( KOKKOS_ENABLE_ROCM ) 
 
 #include <default/TestDefaultDeviceType_Category.hpp>
 #include <TestReduceCombinatorical.hpp>
@@ -60,3 +61,4 @@ TEST_F( defaultdevicetype, reduce_instantiation_c )
 } // namespace Test
 
 #endif
+#endif
diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Crs.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Crs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54b283f5390a22dc5f4a90acee3129ef34170f5c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Crs.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <openmp/TestOpenMP_Category.hpp>
+#include <TestCrs.hpp>
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_Category.hpp b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_Category.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..94e778b3bcd086fa9649d3326905f587dde62e66
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_Category.hpp
@@ -0,0 +1,65 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_TEST_THREADS_HPP
+#define KOKKOS_TEST_THREADS_HPP
+
+#include <gtest/gtest.h>
+
+namespace Test {
+
+class rocm_hostpinned : public ::testing::Test {
+protected:
+  static void SetUpTestCase() {
+  }
+
+  static void TearDownTestCase() {
+  }
+};
+
+} // namespace Test
+
+#define TEST_CATEGORY rocm_hostpinned
+#define TEST_EXECSPACE Kokkos::Experimental::ROCmHostPinnedSpace
+
+#endif
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_SharedAlloc.cpp b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_SharedAlloc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2f8f379db02943e5b7bf1bab70e2ea3a2fe01e90
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_SharedAlloc.cpp
@@ -0,0 +1,55 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestSharedAlloc.hpp>
+
+namespace Test {
+
+
+TEST_F( TEST_CATEGORY, impl_shared_alloc )
+{
+  test_shared_alloc< TEST_EXECSPACE, Kokkos::DefaultHostExecutionSpace >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewAPI.cpp b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewAPI.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..32ecbbb48f564cbc4eb145c57e503fb2c740b5eb
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewAPI.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestViewAPI.hpp>
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5523fac7fc6443cfcf70db74a286a4a2497f707c
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_a.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestViewMapping_a.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e29612d72440e016a960fd6ad4d714ee5f4131b
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_b.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestViewMapping_b.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_subview.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6a6194b38f66f5b26dfd3f8d43c166fefd1ff357
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewMapping_subview.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestViewMapping_subview.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_All.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_All.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a9c7e51b6230104eb8419980c93b79149a6881d6
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_All.cpp
@@ -0,0 +1,33 @@
+#include "rocm/TestROCm_Init.cpp"
+
+//#include "rocm/TestROCm_Complex.cpp"
+#include "rocm/TestROCm_Reductions.cpp"
+//#include "rocm/TestROCm_RangePolicy.cpp"
+//#include "rocm/TestROCm_AtomicOperations.cpp"
+//#include "rocm/TestROCm_Atomics.cpp"
+//#include "rocm/TestROCm_AtomicViews.cpp"
+//#include "rocm/TestROCm_Other.cpp"
+//#include "rocm/TestROCm_Scan.cpp"
+//#include "rocm/TestROCm_SharedAlloc.cpp"
+//#include "rocm/TestROCm_SubView_a.cpp"
+//#include "rocm/TestROCm_SubView_b.cpp"
+//#include "rocm/TestROCm_SubView_c01.cpp"
+//#include "rocm/TestROCm_SubView_c02.cpp"
+//#include "rocm/TestROCm_SubView_c03.cpp"
+//#include "rocm/TestROCm_SubView_c04.cpp"
+//#include "rocm/TestROCm_SubView_c05.cpp"
+//#include "rocm/TestROCm_SubView_c06.cpp"
+//#include "rocm/TestROCm_SubView_c07.cpp"
+//#include "rocm/TestROCm_SubView_c08.cpp"
+//#include "rocm/TestROCm_SubView_c09.cpp"
+//#include "rocm/TestROCm_SubView_c10.cpp"
+//#include "rocm/TestROCm_SubView_c11.cpp"
+//#include "rocm/TestROCm_SubView_c12.cpp"
+//#include "rocm/TestROCm_Team.cpp"
+//#include "rocm/TestROCm_TeamReductionScan.cpp"
+//#include "rocm/TestROCm_TeamScratch.cpp"
+//#include "rocm/TestROCm_ViewAPI_b.cpp"
+//#include "rocm/TestROCm_ViewMapping_a.cpp"
+//#include "rocm/TestROCm_ViewMapping_b.cpp"
+//#include "rocm/TestROCm_ViewMapping_subview.cpp"
+//#include "rocm/TestROCm_ViewOfClass.cpp"
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_AtomicOperations.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_AtomicOperations.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e6b7a2531677e5451308642e2854fb44e2e8ef82
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_AtomicOperations.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestAtomicOperations.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_AtomicViews.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_AtomicViews.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d5f82826b08c499f857fd807ab65ee8026aaf9f5
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_AtomicViews.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestAtomicViews.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Atomics.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Atomics.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5944830c43f1253f747e83cbf2e68aab5ee09446
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Atomics.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestAtomic.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Category.hpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Category.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a34068f533cc11290f72f2a1e99043ef3cf4a9f2
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Category.hpp
@@ -0,0 +1,65 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_TEST_ROCM_HPP
+#define KOKKOS_TEST_ROCM_HPP
+
+#include <gtest/gtest.h>
+
+namespace Test {
+
+class rocm : public ::testing::Test {
+protected:
+  static void SetUpTestCase() {
+  }
+
+  static void TearDownTestCase() {
+  }
+};
+
+} // namespace Test
+
+#define TEST_CATEGORY rocm
+#define TEST_EXECSPACE Kokkos::Experimental::ROCm
+
+#endif
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Complex.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Complex.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2b72cdee88cbdfdd5818acb88192d675ff0addc2
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Complex.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestComplex.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Init.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Init.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dafe9fb52914ef2999286732493e4c1192d91505
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Init.cpp
@@ -0,0 +1,50 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestInit.hpp>
+#include<TestCompilerMacros.hpp>
+#include<TestPolicyConstruction.hpp>
+
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Other.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Other.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3e182dacbad413dc5dd18043d4337ae782e0d74f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Other.cpp
@@ -0,0 +1,52 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestTemplateMetaFunctions.hpp>
+#include<TestAggregate.hpp>
+//include<TestMemoryPool.hpp>
+#include<TestCXX11.hpp>
+#include<TestTile.hpp>
+
+#include<TestViewCtorPropEmbeddedDim.hpp>
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_RangePolicy.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_RangePolicy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef7dad95a3afd5481258368ae5f7ae4d9f171c77
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_RangePolicy.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestRange.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Reductions.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Reductions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..33c4d960f865d14d28355eba053c7f4a1c8dbf90
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Reductions.cpp
@@ -0,0 +1,48 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestFunctorAnalysis.hpp>
+#include <TestReduce.hpp>
+#include <TestCXX11Deduction.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Scan.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Scan.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae0a016af3d285f17da0cf71200f3410c2428bf6
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Scan.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestScan.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SharedAlloc.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SharedAlloc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7a038edf4b2a330a2f8c71037cbb56acff85eca6
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SharedAlloc.cpp
@@ -0,0 +1,55 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestSharedAlloc.hpp>
+
+namespace Test {
+
+
+TEST_F( TEST_CATEGORY, impl_shared_alloc )
+{
+  test_shared_alloc< Kokkos::Experimental::ROCmSpace, Kokkos::DefaultHostExecutionSpace >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Spaces.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Spaces.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d44e7afec385bb08385d4a7a178593b3e7d8a1b2
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Spaces.cpp
@@ -0,0 +1,196 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Core.hpp>
+#include <rocm/TestROCm_Category.hpp>
+
+namespace Test {
+
+KOKKOS_INLINE_FUNCTION
+void test_abort()
+{
+  Kokkos::abort( "test_abort" );
+}
+
+KOKKOS_INLINE_FUNCTION
+void test_rocm_spaces_int_value( int * ptr )
+{
+  if ( *ptr == 42 ) { *ptr = 2 * 42; }
+}
+
+TEST_F( rocm, space_access )
+{
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::HostSpace >::assignable, "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace >::assignable, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::Experimental::ROCmSpace >::assignable, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::Experimental::ROCmSpace >::accessible, "" );
+
+  //--------------------------------------
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCmSpace >::assignable, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCmHostPinnedSpace >::assignable, "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCmHostPinnedSpace >::accessible, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace, Kokkos::HostSpace >::assignable, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace, Kokkos::HostSpace >::accessible, "" );
+
+  //--------------------------------------
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::Experimental::ROCmHostPinnedSpace >::assignable, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::HostSpace >::assignable, "" );
+
+  static_assert(
+    Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::HostSpace >::accessible, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::Experimental::ROCmSpace >::assignable, "" );
+
+  static_assert(
+    ! Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::Experimental::ROCmSpace >::accessible, "" );
+
+  //--------------------------------------
+
+  static_assert(
+    ! Kokkos::Impl::SpaceAccessibility< Kokkos::Experimental::ROCm, Kokkos::HostSpace >::accessible, "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::Experimental::ROCm, Kokkos::Experimental::ROCmSpace >::accessible, "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::Experimental::ROCm, Kokkos::Experimental::ROCmHostPinnedSpace >::accessible, "" );
+
+  static_assert(
+    ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::Experimental::ROCmSpace >::accessible, "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace >::accessible, "" );
+
+  static_assert(
+    std::is_same< Kokkos::Impl::HostMirror< Kokkos::Experimental::ROCmSpace >::Space
+                , Kokkos::HostSpace >::value, "" );
+
+  static_assert(
+    std::is_same< Kokkos::Impl::HostMirror< Kokkos::Experimental::ROCmHostPinnedSpace >::Space
+                , Kokkos::Experimental::ROCmHostPinnedSpace >::value, "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::Experimental::ROCm >::Space
+      , Kokkos::HostSpace
+      >::accessible, "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::Experimental::ROCmSpace >::Space
+      , Kokkos::HostSpace
+      >::accessible, "" );
+
+  static_assert(
+    Kokkos::Impl::SpaceAccessibility
+      < Kokkos::Impl::HostMirror< Kokkos::Experimental::ROCmHostPinnedSpace >::Space
+      , Kokkos::HostSpace
+      >::accessible, "" );
+}
+
+template< class MemSpace, class ExecSpace >
+struct TestViewROCmAccessible {
+  enum { N = 1000 };
+
+  using V = Kokkos::View< double*, MemSpace >;
+
+  V m_base;
+
+  struct TagInit {};
+  struct TagTest {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const TagInit &, const int i ) const { m_base[i] = i + 1; }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const TagTest &, const int i, long & error_count ) const
+  { if ( m_base[i] != i + 1 ) ++error_count; }
+
+  TestViewROCmAccessible()
+    : m_base( "base", N )
+    {}
+
+  static void run()
+  {
+    TestViewROCmAccessible self;
+    Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space, TagInit >( 0, N ), self );
+    MemSpace::execution_space::fence();
+
+    // Next access is a different execution space, must complete prior kernel.
+    long error_count = -1;
+    Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagTest >( 0, N ), self, error_count );
+    EXPECT_EQ( error_count, 0 );
+  }
+};
+
+TEST_F( rocm, impl_view_accessible )
+{
+  TestViewROCmAccessible< Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm >::run();
+
+  TestViewROCmAccessible< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::Experimental::ROCm >::run();
+  TestViewROCmAccessible< Kokkos::Experimental::ROCmHostPinnedSpace, Kokkos::HostSpace::execution_space >::run();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_a.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea39a25b5c755bef4f5421321336aba5d1e6efbd
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_a.cpp
@@ -0,0 +1,104 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_auto_1d_left )
+{
+  TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_auto_1d_right )
+{
+  TestViewSubview::test_auto_1d< Kokkos::LayoutRight, TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_auto_1d_stride )
+{
+  TestViewSubview::test_auto_1d< Kokkos::LayoutStride, TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_assign_strided )
+{
+  TestViewSubview::test_1d_strided_assignment< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_left_0 )
+{
+  TestViewSubview::test_left_0< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_left_1 )
+{
+  TestViewSubview::test_left_1< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_left_2 )
+{
+  TestViewSubview::test_left_2< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_left_3 )
+{
+  TestViewSubview::test_left_3< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_right_0 )
+{
+  TestViewSubview::test_right_0< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_right_1 )
+{
+  TestViewSubview::test_right_1< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_right_3 )
+{
+  TestViewSubview::test_right_3< TEST_EXECSPACE >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_b.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..00a3a341bbdaaea802aa89477b25224af30b3bda
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_b.cpp
@@ -0,0 +1,63 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_layoutleft_to_layoutleft )
+{
+  TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE >();
+  TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+TEST_F( TEST_CATEGORY, view_subview_layoutright_to_layoutright )
+{
+  TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE >();
+  TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >();
+  TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c01.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c01.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c17b0722e99f88947d854a759184ab4ff9a913b1
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c01.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_1d_assign )
+{
+  TestViewSubview::test_1d_assign< TEST_EXECSPACE >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c02.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c02.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e723b433239a2ac94a541f7d6949b93b0d256bbd
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c02.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_1d_assign_atomic )
+{
+  TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c03.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c03.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1170a70f50ad622330ad3e58e4fff9c27c1215f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c03.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_1d_assign_randomaccess )
+{
+  TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c04.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c04.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0788a82ba4d4833a8b51652a22ae92469b5625f7
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c04.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_2d_from_3d )
+{
+  TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c05.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c05.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8d075a37c099e887542f311e19f8f0d99d618aa3
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c05.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_atomic )
+{
+  TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c06.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c06.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9dc782571a5a00b39a54ddb5090f6de0f36326f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c06.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_randomaccess )
+{
+  TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c07.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c07.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54c46095aa9df55bc66451a76be4d4ec862f069a
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c07.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left )
+{
+  TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c08.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c08.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..369e16a795acc12cbe22f5fc5bf22ff82c019c0b
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c08.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_atomic )
+{
+  TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c09.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c09.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b97926f98e8815229f4ed16fac9bc19fbe95eeec
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c09.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess )
+{
+  TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c10.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c10.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a1d47e0fbdcaed6b064e039289b3cf500c613d98
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c10.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right )
+{
+  TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c11.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c11.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5be70dc22e80c159730aa3134dd3d1b7b01cfa58
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c11.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_atomic )
+{
+  TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c12.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c12.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..81354766623fc7336025493a7a2bca61079d21fa
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_SubView_c12.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmUVM_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess )
+{
+  TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >();
+}
+
+} // namespace Test
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_Team.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_Team.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..054bbd83c830724612cb6c2dd49d9a88a4853241
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_Team.cpp
@@ -0,0 +1,75 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestTeam.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, team_for )
+{
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 );
+
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 );
+
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 );
+}
+
+
+TEST_F( TEST_CATEGORY, team_reduce )
+{
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 );
+  TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 );
+}
+}
+
+#include <TestTeamVector.hpp>
+
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ba0eb0e1bd54c8073ac078a5191ff83bd920d593
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp
@@ -0,0 +1,82 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestTeam.hpp>
+
+namespace Test {
+
+#if !defined(KOKKOS_ROCM_CLANG_WORKAROUND)
+TEST_F( TEST_CATEGORY, team_scan )
+{
+  TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10 );
+  TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10 );
+  TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 );
+  TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
+}
+#endif
+
+TEST_F( TEST_CATEGORY, team_long_reduce )
+{
+  TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+TEST_F( TEST_CATEGORY, team_double_reduce )
+{
+  TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 );
+  TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 );
+  TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 );
+  TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 );
+  TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 );
+  TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 );
+}
+
+} // namespace Test
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..351dfee2b5cd2b23cab117738a8fb8bd8dfc054d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp
@@ -0,0 +1,83 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestTeam.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, team_shared_request )
+{
+  TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
+  TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+TEST_F( TEST_CATEGORY, team_scratch_request )
+{
+  TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
+  TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_ROCM) || ( 8000 <= ROCM_VERSION )
+TEST_F( TEST_CATEGORY, team_lambda_shared_request )
+{
+  TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
+  TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+#endif
+#endif
+
+TEST_F( TEST_CATEGORY, shmem_size )
+{
+  TestShmemSize< TEST_EXECSPACE >();
+}
+
+TEST_F( TEST_CATEGORY, multi_level_scratch )
+{
+  TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
+  TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
+}
+
+} // namespace Test
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewAPI_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3e6f559438a410a900fcea2808330ffdc227073f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewAPI_b.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewAPI.hpp>
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_a.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a7b2b9695de8f910d089f50de86928a60d09225f
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_a.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewMapping_a.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_b.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa5b209f1b39530e518ee3d1803ef1b70aa9504e
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_b.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewMapping_b.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_subview.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0af114c7ea2b0691127a92632036941e6fdea82b
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewMapping_subview.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewMapping_subview.hpp>
+
diff --git a/lib/kokkos/core/unit_test/rocm/TestROCm_ViewOfClass.cpp b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewOfClass.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f0b95c0e004cb5dc6f4c0ffa668cd1dda8e5719d
--- /dev/null
+++ b/lib/kokkos/core/unit_test/rocm/TestROCm_ViewOfClass.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCm_Category.hpp>
+#include <TestViewOfClass.hpp>
+
diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Crs.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Crs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5799ab816c2ce2c9f97c78f208809717f93d8404
--- /dev/null
+++ b/lib/kokkos/core/unit_test/serial/TestSerial_Crs.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <serial/TestSerial_Category.hpp>
+#include <TestCrs.hpp>
diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Crs.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Crs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..25243273fe82b73c41bef5702e45db6e7ada6860
--- /dev/null
+++ b/lib/kokkos/core/unit_test/threads/TestThreads_Crs.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<threads/TestThreads_Category.hpp>
+#include<TestCrs.hpp>
diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash
index 6d636dc7e4fc0e8225556bca214b0984f2c43bca..b4a69d30fd97d3fd566d31d0ad12dc0053713633 100755
--- a/lib/kokkos/generate_makefile.bash
+++ b/lib/kokkos/generate_makefile.bash
@@ -31,6 +31,9 @@ do
       KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda"
       CUDA_PATH="${key#*=}"
       ;;
+    --with-rocm)
+      KOKKOS_DEVICES="${KOKKOS_DEVICES},ROCm"
+      ;;
     --with-openmp)
       KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP"
       ;;
@@ -56,6 +59,9 @@ do
     --with-hwloc*)
       HWLOC_PATH="${key#*=}"
       ;;
+    --with-memkind*)
+      MEMKIND_PATH="${key#*=}"
+      ;;
     --arch*)
       KOKKOS_ARCH="${key#*=}"
       ;;
@@ -117,6 +123,7 @@ do
       echo "                 ARMv81         = ARMv8.1 Compatible CPU"
       echo "                 ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU"
       echo "               [IBM]"
+      echo "                 Power7         = IBM POWER7 and POWER7+ CPUs"
       echo "                 Power8         = IBM POWER8 CPUs"
       echo "                 Power9         = IBM POWER9 CPUs"
       echo "               [Intel]"
@@ -151,7 +158,8 @@ do
       echo "                                -lpthread, etc.)."
       echo "--with-gtest=/Path/To/Gtest:  Set path to gtest.  (Used in unit and performance"
       echo "                                tests.)"
-      echo "--with-hwloc=/Path/To/Hwloc:  Set path to hwloc."
+      echo "--with-hwloc=/Path/To/Hwloc:  Set path to hwloc library."
+      echo "--with-memkind=/Path/To/MemKind:  Set path to memkind library."
       echo "--with-options=[OPT]:         Additional options to Kokkos:"
       echo "                                compiler_warnings"
       echo "                                aggressive_vectorization = add ivdep on loops"
@@ -228,7 +236,17 @@ else
 fi
 
 if [ ${#HWLOC_PATH} -gt 0 ]; then
-  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc"
+  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH}"
+  KOKKOS_USE_TPLS="${KOKKOS_USE_TPLS},hwloc"
+fi
+
+if [ ${#MEMKIND_PATH} -gt 0 ]; then
+  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} MEMKIND_PATH=${MEMKIND_PATH}" 
+  KOKKOS_USE_TPLS="${KOKKOS_USE_TPLS},experimental_memkind"
+fi
+
+if [ ${#KOKKOS_USE_TPLS} -gt 0 ]; then
+  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_USE_TPLS=${KOKKOS_USE_TPLS}"
 fi
 
 if [ ${#QTHREADS_PATH} -gt 0 ]; then