From 9c2200338acc77d43cf41e01376947f3137823a8 Mon Sep 17 00:00:00 2001
From: Marcin Kirsz <mkirsz@ed.ac.uk>
Date: Thu, 26 Sep 2024 16:08:54 +0100
Subject: [PATCH] Remove MPI from HPO

---
 bin/tadah_cli.cpp | 298 +++++++++++++++++++++++-----------------------
 1 file changed, 149 insertions(+), 149 deletions(-)

diff --git a/bin/tadah_cli.cpp b/bin/tadah_cli.cpp
index c0864fc..02e8fed 100644
--- a/bin/tadah_cli.cpp
+++ b/bin/tadah_cli.cpp
@@ -43,9 +43,9 @@ extern "C" void pdgemv_(char* transa, int* m, int* n, double* alpha, double* a,
 void TadahCLI::subcommand_train() {
 
   int rank = 0;
-  int ncpu = 1;
 
 #ifdef TADAH_BUILD_MPI
+  int ncpu = 1;
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &ncpu);
   if (ncpu<2) {
@@ -231,7 +231,7 @@ void TadahCLI::subcommand_train() {
     // HOST: prepare work packages
     // filename, first structure index, number of structures to read
     std::vector<std::tuple<std::string,int,int>> wpckgs;
-    int nstruc = config.get<int>("MPIWPCKG");  // TODO: read from Config, the number of structures in a single work package
+    int nstruc = config.get<int>("MPIWPCKG");
     for (const std::string &fn : config("DBFILE")) {
       // get number of structures
       int dbsize = StructureDB::count(fn).first;
@@ -702,7 +702,7 @@ void TadahCLI::subcommand_train() {
     std::ofstream outfile;
     outfile.open ("pot.tadah");
     outfile << param_file << std::endl;;
-  //
+  // TODO What about uncertainty when MPI is enabled?
   //  //if(train->count("--uncertainty")) {
   //  //  t_type weights = model->get_weights();
   //  //  t_type unc = model->get_weights_uncertainty();
@@ -833,15 +833,15 @@ void TadahCLI::subcommand_hpo(
 #ifdef TADAH_ENABLE_HPO
   CLI::Timer timer_tot {"HPO", CLI::Timer::Big};
   // the number of processes in MPI_COMM_WORLD
-  int ncpu=1;
+  // int ncpu=1;
   // the rank of this process in MPI_COMM_WORLD
-  int rank=0;
+  // int rank=0;
 
-#ifdef TADAH_BUILD_MPI
-  MPI_Comm_size(MPI_COMM_WORLD, &ncpu);
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-  MPI_Status status;  
-#endif
+// #ifdef TADAH_BUILD_MPI
+//   MPI_Comm_size(MPI_COMM_WORLD, &ncpu);
+//   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+//   MPI_Status status;  
+// #endif
 
   if(hpo->count("--verbose"))
     set_verbose();
@@ -860,123 +860,123 @@ void TadahCLI::subcommand_hpo(
     config.add("STRESS", "true");
   }
 
-  if (hpo->count("--dtargets")) {
-    // list of indices to the trg vec to process by this proc
-    std::vector<int>local_trg_indices;
-
-    // Everyone read files from the provided directory
-    std::vector<fs::path> trg=read_targets(targets_dir);
-
-    // the number of files the process will work on
-    // also the size of an array it will get from the host process.
-    int s;
-    if ( rank == 0 ) {
-      // host proc distributes work equally between available processes
-      // Each process will receive an array of integers.
-      // Integers correspond to indices in the trg vector
-      // e.g. indices 3,4 indicate that the process
-      // should work on target files trg[3] and trg[4]
-
-      // prep indices array and fill from 0 to trg.size()-1
-      std::vector<int> trg_idx(trg.size());
-      std::iota (std::begin(trg_idx), std::end(trg_idx), 0);
-
-      // Establish the number of target files per process.
-      // The work should be evenly distributed.
-      // e.g.
-      // For 4 processes and 19 files
-      // 5 5 5 4
-      std::vector<int> counts(ncpu,0);
-      for (size_t i=0;i<trg.size();++i) {
-        counts[i%ncpu]++;
-      }
-
-      // Keep first index to sent to each proc
-      // For example above 0,5,10,15
-      std::vector<int> first_idx(ncpu);
-      int sum=0;
-      for (int i=0;i<ncpu; ++i) {
-        first_idx[i]=sum;
-        sum+=counts[i];
-      }
-
-      // First send expected size of a chunk
-      for (int p = 1; p < ncpu; p++ ){
-        s=counts[p];
-#ifdef TADAH_BUILD_MPI
-        MPI_Send ( &s, 1, MPI_INT, p, 99, MPI_COMM_WORLD );
-#endif
-      }
-
-      // and prepare host process for its own work
-      s=counts[rank];
-      local_trg_indices.resize(s);
-#ifdef TADAH_BUILD_MPI
-      MPI_Scatterv(trg_idx.data(), counts.data(), first_idx.data(),
-          MPI_INT, local_trg_indices.data(), s, MPI_INT, 0, MPI_COMM_WORLD);
-#endif
-    }
-    else  {
-#ifdef TADAH_BUILD_MPI
-      // Get the size of work to be done
-      MPI_Recv ( &s, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, &status );
-#endif
-      // We know the amount of work, so can resize array
-      local_trg_indices.resize(s);
-
-
-#ifdef TADAH_BUILD_MPI
-      // Finally get indices to the trg array
-      MPI_Scatterv(NULL, NULL, NULL, MPI_INT, local_trg_indices.data(),
-          s, MPI_INT, 0, MPI_COMM_WORLD);
-#endif
-    }
-
-    // Finally, we can start working on each target file assigned.
-    // But first we have to create sensible directory structure
-    // where we can dump all the output. 
-    // The user provides the outdir which we assume that is empty 
-    // and create subdirectories which corresponds to the names
-    // of the target files. All paths should be absolute.
-    // Note that we do not parallelise here over threads
-    // because parallelisation is done elsewhere.
-
-    // Keep program current working directory
-    fs::path cwd = fs::absolute(fs::current_path());
-
-    // Prepare outdir path
-    fs::path outdir = cwd.append(targets_out_dir);
-
-    for (const int idx: local_trg_indices) {
-      // Build an absolute path for target computations
-      // inside the user specified outdir.
-      // Use the name of the target file without extenstion
-      fs::path outdir_target = outdir;
-      outdir_target /= trg[idx].filename().replace_extension("");
-
-      // Create output dir for trg[idx] target
-      std::filesystem::create_directory(outdir_target);
-
-      // Copy target file to its output directory
-      // so the user has a copy for future reference
-      fs::copy(fs::absolute(trg[idx]),outdir_target);
-
-      // Change to target working directory
-      std::filesystem::current_path(outdir_target);
-
-      // Get the name for a target file
-      std::string target_file = trg[idx].filename();
-
-      // Run computation
-      hpo_run(config, target_file);
-
-      // Just in case return to where we started
-      std::filesystem::current_path(cwd);
-    }
-  }
-  else if(hpo->count("--target")) {
+  //if (hpo->count("--dtargets")) {
+  //  // list of indices to the trg vec to process by this proc
+  //  std::vector<int>local_trg_indices;
+
+  //  // Everyone read files from the provided directory
+  //  std::vector<fs::path> trg=read_targets(targets_dir);
+
+  //  // the number of files the process will work on
+  //  // also the size of an array it will get from the host process.
+  //  int s;
+  //  if ( rank == 0 ) {
+  //    // host proc distributes work equally between available processes
+  //    // Each process will receive an array of integers.
+  //    // Integers correspond to indices in the trg vector
+  //    // e.g. indices 3,4 indicate that the process
+  //    // should work on target files trg[3] and trg[4]
+
+  //    // prep indices array and fill from 0 to trg.size()-1
+  //    std::vector<int> trg_idx(trg.size());
+  //    std::iota (std::begin(trg_idx), std::end(trg_idx), 0);
+
+  //    // Establish the number of target files per process.
+  //    // The work should be evenly distributed.
+  //    // e.g.
+  //    // For 4 processes and 19 files
+  //    // 5 5 5 4
+  //    std::vector<int> counts(ncpu,0);
+  //    for (size_t i=0;i<trg.size();++i) {
+  //      counts[i%ncpu]++;
+  //    }
+
+  //    // Keep first index to sent to each proc
+  //    // For example above 0,5,10,15
+  //    std::vector<int> first_idx(ncpu);
+  //    int sum=0;
+  //    for (int i=0;i<ncpu; ++i) {
+  //      first_idx[i]=sum;
+  //      sum+=counts[i];
+  //    }
+
+  //    // First send expected size of a chunk
+  //    for (int p = 1; p < ncpu; p++ ){
+  //      s=counts[p];
+//#ifdef TADAH_BUILD_MPI
+  //      MPI_Send ( &s, 1, MPI_INT, p, 99, MPI_COMM_WORLD );
+//#endif
+  //    }
+
+  //    // and prepare host process for its own work
+  //    s=counts[rank];
+  //    local_trg_indices.resize(s);
+//#ifdef TADAH_BUILD_MPI
+  //    MPI_Scatterv(trg_idx.data(), counts.data(), first_idx.data(),
+  //        MPI_INT, local_trg_indices.data(), s, MPI_INT, 0, MPI_COMM_WORLD);
+//#endif
+  //  }
+  //  else  {
+//#ifdef TADAH_BUILD_MPI
+  //    // Get the size of work to be done
+  //    MPI_Recv ( &s, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, &status );
+//#endif
+  //    // We know the amount of work, so can resize array
+  //    local_trg_indices.resize(s);
+
+
+//#ifdef TADAH_BUILD_MPI
+  //    // Finally get indices to the trg array
+  //    MPI_Scatterv(NULL, NULL, NULL, MPI_INT, local_trg_indices.data(),
+  //        s, MPI_INT, 0, MPI_COMM_WORLD);
+//#endif
+  //  }
+
+  //  // Finally, we can start working on each target file assigned.
+  //  // But first we have to create sensible directory structure
+  //  // where we can dump all the output. 
+  //  // The user provides the outdir which we assume that is empty 
+  //  // and create subdirectories which corresponds to the names
+  //  // of the target files. All paths should be absolute.
+  //  // Note that we do not parallelise here over threads
+  //  // because parallelisation is done elsewhere.
+
+  //  // Keep program current working directory
+  //  fs::path cwd = fs::absolute(fs::current_path());
+
+  //  // Prepare outdir path
+  //  fs::path outdir = cwd.append(targets_out_dir);
+
+  //  for (const int idx: local_trg_indices) {
+  //    // Build an absolute path for target computations
+  //    // inside the user specified outdir.
+  //    // Use the name of the target file without extenstion
+  //    fs::path outdir_target = outdir;
+  //    outdir_target /= trg[idx].filename().replace_extension("");
+
+  //    // Create output dir for trg[idx] target
+  //    std::filesystem::create_directory(outdir_target);
+
+  //    // Copy target file to its output directory
+  //    // so the user has a copy for future reference
+  //    fs::copy(fs::absolute(trg[idx]),outdir_target);
+
+  //    // Change to target working directory
+  //    std::filesystem::current_path(outdir_target);
+
+  //    // Get the name for a target file
+  //    std::string target_file = trg[idx].filename();
+
+  //    // Run computation
+  //    hpo_run(config, target_file);
+
+  //    // Just in case return to where we started
+  //    std::filesystem::current_path(cwd);
+  //  }
+  //}
+  //else if(hpo->count("--target")) {
     hpo_run(config, target_file);
-  }
+  //}
 
   if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
 #else
@@ -1132,29 +1132,29 @@ TadahCLI::TadahCLI():
     ->option_text("TARGET_FILE")
     ->check(CLI::ExistingFile);
 
-  ss.str(std::string());
-  ss << "Output directory for -d option.\n";
-  auto o_opt=hpo->add_option("-o,--out_dir", targets_out_dir, ss.str())
-    ->option_text("OUTPUT_DIRECTORY")
-    ->check(CLI::ExistingDirectory)
-    ->excludes(t_opt)
-    ->needs(c_opt);
-
-  ss.str(std::string());
-  ss << "A direcory containing model's target files.\n";
-  ss << "Each target file must contain a set of\n";
-  ss << "hyperparameter constraints as in the -t option.\n";
-  ss << "This option can be run with MPI.\n";
-  ss << "Each MPI process will run independent\n";
-  ss << "optimisation for every target in the directory\n";
-  ss << "resulting in N models for N targets.\n";
-  ss << "See documentation for more details.\n";
-  hpo->add_option("-d,--dtargets", targets_dir, ss.str())
-    ->option_text("TARGETS_DIRECTORY")
-    ->check(CLI::ExistingDirectory)
-    ->excludes(t_opt)
-    ->needs(c_opt)
-    ->needs(o_opt);
+  // ss.str(std::string());
+  // ss << "Output directory for -d option.\n";
+  // auto o_opt=hpo->add_option("-o,--out_dir", targets_out_dir, ss.str())
+  //   ->option_text("OUTPUT_DIRECTORY")
+  //   ->check(CLI::ExistingDirectory)
+  //   ->excludes(t_opt)
+  //   ->needs(c_opt);
+
+  //ss.str(std::string());
+  //ss << "A direcory containing model's target files.\n";
+  //ss << "Each target file must contain a set of\n";
+  //ss << "hyperparameter constraints as in the -t option.\n";
+  //ss << "This option can be run with MPI.\n";
+  //ss << "Each MPI process will run independent\n";
+  //ss << "optimisation for every target in the directory\n";
+  //ss << "resulting in N models for N targets.\n";
+  //ss << "See documentation for more details.\n";
+  //hpo->add_option("-d,--dtargets", targets_dir, ss.str())
+  //  ->option_text("TARGETS_DIRECTORY")
+  //  ->check(CLI::ExistingDirectory)
+  //  ->excludes(t_opt)
+  //  ->needs(c_opt)
+  //  ->needs(o_opt);
 
 
   hpo->add_flag("-F,--Force", "Train with forces.");
-- 
GitLab