From 341fa160fe8c7c64baa9b1fac902ec57714f4049 Mon Sep 17 00:00:00 2001
From: Trung Nguyen <ndactrung@gmail.com>
Date: Wed, 23 May 2018 16:11:55 -0500
Subject: [PATCH] Updated UCL_Device built with OpenCL to use platforms that
 support accelerators by default

---
 lib/gpu/Opencl.makefile     |   2 +-
 lib/gpu/geryon/ocl_device.h | 168 ++++++++++++++++++++++--------------
 2 files changed, 102 insertions(+), 68 deletions(-)

diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile
index a5fcde68f5..3e73e6256c 100644
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@@ -609,7 +609,7 @@ $(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_
 $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
 
-$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
+$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
 	$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) 
 
 $(OCL_LIB): $(OBJS) $(PTXS)
diff --git a/lib/gpu/geryon/ocl_device.h b/lib/gpu/geryon/ocl_device.h
index 584d04e616..2b2367545e 100644
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@@ -280,6 +280,9 @@ class UCL_Device {
   /// Return the OpenCL type for the device
   inline cl_device_id & cl_device() { return _cl_device; }
 
+  /// Select the platform that has accelerators
+  inline void set_platform_accelerator(int pid=-1);
+
  private:
   int _num_platforms;          // Number of platforms
   int _platform;               // UCL_Device ID for current platform
@@ -311,8 +314,8 @@ UCL_Device::UCL_Device() {
     return;
   } else
     _num_platforms=static_cast<int>(nplatforms);
-
-  set_platform(0);
+  // note that platform 0 may not necessarily be associated with accelerators
+  set_platform_accelerator();
 }
 
 UCL_Device::~UCL_Device() {
@@ -320,6 +323,7 @@ UCL_Device::~UCL_Device() {
 }
 
 void UCL_Device::clear() {
+  _properties.clear();
   if (_device>-1) {
     for (size_t i=0; i<_cq.size(); i++) {
       CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back()));
@@ -529,75 +533,105 @@ int UCL_Device::set(int num) {
   return create_context();
 }
 
-// List all devices along with all properties
+// List all devices from all platforms along with all properties
 void UCL_Device::print_all(std::ostream &out) {
-  if (num_devices() == 0)
-    out << "There is no device supporting OpenCL\n";
-  for (int i=0; i<num_devices(); ++i) {
-    out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
-    out << "  Type of device:                                "
-        << device_type_name(i).c_str() << std::endl;
-    out << "  Double precision support:                      ";
-    if (double_precision(i))
-      out << "Yes\n";
-    else
-      out << "No\n";
-    out << "  Total amount of global memory:                 "
-        << gigabytes(i) << " GB\n";
-    out << "  Number of compute units/multiprocessors:       "
-        << _properties[i].compute_units << std::endl;
-    //out << "  Number of cores:                               "
-    //    << cores(i) << std::endl;
-    out << "  Total amount of constant memory:               "
-        << _properties[i].const_mem << " bytes\n";
-    out << "  Total amount of local/shared memory per block: "
-        << _properties[i].shared_mem << " bytes\n";
-    //out << "  Total number of registers available per block: "
-    //    << _properties[i].regsPerBlock << std::endl;
-    //out << "  Warp size:                                     "
-    //    << _properties[i].warpSize << std::endl;
-    out << "  Maximum group size (# of threads per block)    "
-        << _properties[i].work_group_size << std::endl;
-    out << "  Maximum item sizes (# threads for each dim)    "
-        << _properties[i].work_item_size[0] << " x "
-        << _properties[i].work_item_size[1] << " x "
-        << _properties[i].work_item_size[2] << std::endl;
-    //out << "  Maximum sizes of each dimension of a grid:     "
-    //    << _properties[i].maxGridSize[0] << " x "
-    //    << _properties[i].maxGridSize[1] << " x "
-    //    << _properties[i].maxGridSize[2] << std::endl;
-    //out << "  Maximum memory pitch:                          "
-    //    << _properties[i].memPitch) << " bytes\n";
-    //out << "  Texture alignment:                             "
-    //    << _properties[i].textureAlignment << " bytes\n";
-    out << "  Clock rate:                                    "
-        << clock_rate(i) << " GHz\n";
-    //out << "  Concurrent copy and execution:                 ";
-    out << "  ECC support:                                   ";
-    if (_properties[i].ecc_support)
-      out << "Yes\n";
-    else
-      out << "No\n";
-    out << "  Device fission into equal partitions:          ";
-    if (fission_equal(i))
-      out << "Yes\n";
-    else
-      out << "No\n";
-    out << "  Device fission by counts:                      ";
-    if (fission_by_counts(i))
-      out << "Yes\n";
-    else
-      out << "No\n";
-    out << "  Device fission by affinity:                    ";
-    if (fission_by_affinity(i))
-      out << "Yes\n";
-    else
-      out << "No\n";
-    out << "  Maximum subdevices from fission:               "
-        << max_sub_devices(i) << std::endl;
+  // --- loop through the platforms
+  for (int n=0; n<_num_platforms; n++) {
+
+    set_platform(n);
+
+    out << "\nPlatform " << n << ":\n";
+
+    if (num_devices() == 0)
+      out << "There is no device supporting OpenCL\n";
+    for (int i=0; i<num_devices(); ++i) {
+      out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
+      out << "  Type of device:                                "
+          << device_type_name(i).c_str() << std::endl;
+      out << "  Double precision support:                      ";
+      if (double_precision(i))
+        out << "Yes\n";
+      else
+        out << "No\n";
+      out << "  Total amount of global memory:                 "
+          << gigabytes(i) << " GB\n";
+      out << "  Number of compute units/multiprocessors:       "
+          << _properties[i].compute_units << std::endl;
+      //out << "  Number of cores:                               "
+      //    << cores(i) << std::endl;
+      out << "  Total amount of constant memory:               "
+          << _properties[i].const_mem << " bytes\n";
+      out << "  Total amount of local/shared memory per block: "
+          << _properties[i].shared_mem << " bytes\n";
+      //out << "  Total number of registers available per block: "
+      //    << _properties[i].regsPerBlock << std::endl;
+      //out << "  Warp size:                                     "
+      //    << _properties[i].warpSize << std::endl;
+      out << "  Maximum group size (# of threads per block)    "
+          << _properties[i].work_group_size << std::endl;
+      out << "  Maximum item sizes (# threads for each dim)    "
+          << _properties[i].work_item_size[0] << " x "
+          << _properties[i].work_item_size[1] << " x "
+          << _properties[i].work_item_size[2] << std::endl;
+      //out << "  Maximum sizes of each dimension of a grid:     "
+      //    << _properties[i].maxGridSize[0] << " x "
+      //    << _properties[i].maxGridSize[1] << " x "
+      //    << _properties[i].maxGridSize[2] << std::endl;
+      //out << "  Maximum memory pitch:                          "
+      //    << _properties[i].memPitch) << " bytes\n";
+      //out << "  Texture alignment:                             "
+      //    << _properties[i].textureAlignment << " bytes\n";
+      out << "  Clock rate:                                    "
+          << clock_rate(i) << " GHz\n";
+      //out << "  Concurrent copy and execution:                 ";
+      out << "  ECC support:                                   ";
+      if (_properties[i].ecc_support)
+        out << "Yes\n";
+      else
+        out << "No\n";
+      out << "  Device fission into equal partitions:          ";
+      if (fission_equal(i))
+        out << "Yes\n";
+      else
+        out << "No\n";
+      out << "  Device fission by counts:                      ";
+      if (fission_by_counts(i))
+        out << "Yes\n";
+      else
+        out << "No\n";
+      out << "  Device fission by affinity:                    ";
+      if (fission_by_affinity(i))
+        out << "Yes\n";
+      else
+        out << "No\n";
+      out << "  Maximum subdevices from fission:               "
+          << max_sub_devices(i) << std::endl;
+    }
   }
 }
 
+// Select the platform that is associated with accelerators
+// if pid < 0, select the first platform
+void UCL_Device::set_platform_accelerator(int pid) {
+  if (pid < 0) {
+    int found = 0;
+    for (int n=0; n<_num_platforms; n++) {
+      set_platform(n);
+      for (int i=0; i<num_devices(); i++) {
+        if (_properties[i].device_type==CL_DEVICE_TYPE_CPU ||
+            _properties[i].device_type==CL_DEVICE_TYPE_GPU ||
+            _properties[i].device_type==CL_DEVICE_TYPE_ACCELERATOR) {
+          found = 1;
+          break;
+        }
+      }
+      if (found) break;
+    }
+  } else {
+    set_platform(pid);
+  }
 }
 
+} // namespace ucl_opencl 
+
 #endif
-- 
GitLab