From e8ac237a9282fe2f6bce8d89fb3e4e50e9eae217 Mon Sep 17 00:00:00 2001
From: Marcin Kirsz <mkirsz@ed.ac.uk>
Date: Tue, 16 Apr 2024 14:10:46 +0100
Subject: [PATCH] Link scalapack. Implemented routines to compute sizes of
 local matrices...

---
 CMakeLists.txt    |   4 +-
 bin/tadah_cli.cpp | 982 +++++++++++++++++++++++++---------------------
 2 files changed, 534 insertions(+), 452 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 33f5518..1aba84a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -236,11 +236,13 @@ if(TADAH_BUILD_MPI)
     target_link_libraries(tadah PUBLIC MPI::MPI_CXX)
     add_compile_definitions(TADAH_BUILD_MPI)
   endif()
+  find_package(SCALAPACK REQUIRED)
+  target_link_libraries(tadah PUBLIC scalapack)
 endif()
 message(STATUS "${TADAH}: Build MPI version of ${TADAH} is ${TADAH_BUILD_MPI}")
 #########################################################################
 
-#########################################################################
+#########################################################################     
 message(STATUS "${TADAH}: Build with Hyperparameter optimiser is ${TADAH_ENABLE_HPO}")
 #########################################################################
 
diff --git a/bin/tadah_cli.cpp b/bin/tadah_cli.cpp
index 1fe6ee6..7a4edbb 100644
--- a/bin/tadah_cli.cpp
+++ b/bin/tadah_cli.cpp
@@ -11,6 +11,7 @@
 #include "../MLIP/nn_finder.h"
 #include "../MLIP/analytics/analytics.h"
 #include "../MLIP/version.h"
+#include "../MLIP/design_matrix/design_matrix.h"
 #ifdef TADAH_ENABLE_HPO
 #include "../HPO/hpo.h"
 #endif
@@ -18,398 +19,477 @@
 #include <iostream>
 #include <stdexcept>
 
+#ifdef TADAH_BUILD_MPI
+extern "C" void blacs_get_(int*, int*, int*);
+extern "C" void blacs_pinfo_(int*, int*);
+extern "C" void blacs_gridinit_(int*, char*, int*, int*);
+extern "C" void blacs_gridinfo_(int*, int*, int*, int*, int*);
+extern "C" void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*);
+extern "C" void pdpotrf_(char*, int*, double*, int*, int*, int*, int*);
+extern "C" void blacs_gridexit_(int*);
+extern "C" int numroc_(int*, int*, int*, int*, int*);
+#endif
+
 void TadahCLI::subcommand_train() {
-  
+
+  int rank = 0;
+  int ncpu = 1;
+
+
+#ifdef TADAH_BUILD_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &ncpu);
+#endif
+
   /* MPI CODE:
    * 1. each rank reads config file
-   * 2. rank 0 should calculate total number of structures
-   * 3. rank 0 should be able to calculate the size of the PHI matrix
-   *    based on force and stress flags
-   *    Who should be able to do this? We do not want to load all data
-   * 4. rank 0 should send the local matrix sizes to worker processes
+   * 2. root calculate total number of structures
+   * 3. root calculate the dimensions of the PHI matrix
+   *    based on force and stress flags and type of the regression
+   * 4. root broadcast PHI cols number to all cpus
    */
 
 
-    CLI::Timer timer_tot {"Training", CLI::Timer::Big};
-    if(train->count("--verbose"))
-        set_verbose();
-    Config config(config_file);
-    config.check_for_training();
+  CLI::Timer timer_tot {"Training", CLI::Timer::Big};
 
-    if(train->count("--Force")) {
-        config.remove("FORCE");
-        config.add("FORCE", "true");
-    }
-    if(train->count("--Stress")) {
-        config.remove("STRESS");
-        config.add("STRESS", "true");
-    }
+  if(train->count("--verbose"))
+    set_verbose();
+
+  Config config(config_file);
+  config.check_for_training();
+
+  if(train->count("--Force")) {
+    config.remove("FORCE");
+    config.add("FORCE", "true");
+  }
+  if(train->count("--Stress")) {
+    config.remove("STRESS");
+    config.add("STRESS", "true");
+  }
 
+
+  if (rank==0)
     if (is_verbose()) std::cout << "Training mode" << std::endl;
-    DC_Selector DCS(config);
 
-    // must set DSIZE key as early as possible,
-    // this will allow us to querry for the number of columns later on
-    DescriptorsCalc<> dc(config,*DCS.d2b,*DCS.d3b,*DCS.dmb,
-            *DCS.c2b,*DCS.c3b,*DCS.cmb);
+  DC_Selector DCS(config);
 
-    DM_Function_Base *fb = CONFIG::factory<DM_Function_Base,Config&>(
-            config.get<std::string>("MODEL",1),config);
+  // must set DSIZE key as early as possible,
+  // this will allow us to querry for the number of columns later on
+  DescriptorsCalc<> dc(config,*DCS.d2b,*DCS.d3b,*DCS.dmb,
+      *DCS.c2b,*DCS.c3b,*DCS.cmb);
 
+  DM_Function_Base *fb = CONFIG::factory<DM_Function_Base,Config&>(
+      config.get<std::string>("MODEL",1),config);
 
-    M_Tadah_Base *model = CONFIG::factory<M_Tadah_Base,DM_Function_Base&,Config&>
-        (config.get<std::string>("MODEL",0),*fb,config);
+  M_Tadah_Base *model = CONFIG::factory<M_Tadah_Base,DM_Function_Base&,Config&>
+    (config.get<std::string>("MODEL",0),*fb,config);
 
-    std::cout << "PHI COLS: " << fb->get_phi_cols(config) << std::endl;
-    //std::cout << "PHI ROWS: " << ??? << std::endl;
+  int PHI_cols;
+  int PHI_rows;
 
+  if (rank==0) {
+    int nstruct_tot = StructureDB::count(config).first;
+    int natoms_tot = StructureDB::count(config).second;
+    PHI_cols = fb->get_phi_cols(config);
+    PHI_rows = DesignMatrixBase::phi_rows_num(config, nstruct_tot, natoms_tot);
+  }
+#ifdef TADAH_BUILD_MPI
+  MPI_Bcast(&PHI_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&PHI_cols, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  std::cout << "PHI ROWS: " << PHI_rows << std::endl;
+  std::cout << "PHI COLS: " << PHI_cols << std::endl;
+#endif
 
-    if (is_verbose()) std::cout << "Loading structures..." << std::flush;
-    StructureDB stdb(config);
-    if (is_verbose()) std::cout << "Done!" << std::endl;
+  // Initialize BLACS
+  int b_rank;
+  int b_row;
+  int b_col;
+  int context;
+  int b_nrows;   // Number of row procs
+  int b_ncols;   // Number of column procs
+  int rnb = 22;   // Row block size
+  int cnb = 22;   // Column block size
+#ifdef TADAH_BUILD_MPI
+  blacs_pinfo_(&b_rank, &ncpu) ; // BLACS rank and world size
+  std::cout << "RANK: " << rank << " BRANK: " << b_rank << " ncpu: " << ncpu << std::endl;
+
+  // for now assume that the CPU grid is tall and narrow
+  b_ncols = 1;
+  b_nrows = ncpu;
+  assert(b_nrows * b_ncols == ncpu);
+
+  std::cout << "b_ncols: " << b_ncols << " b_nrows: " << b_nrows << std::endl;
+  int zero = 0;
+  blacs_get_(&context,&zero, &zero ); // -> Create context
+  std::cout << "ZERO: " << zero << " CONTEXT: " << context << std::endl;
+  char layout='R'; // Block cyclic, Row major processor mapping
+  blacs_gridinit_(&context, &layout, &b_nrows, &b_ncols ); // Context -> Initialize the grid
+  blacs_gridinfo_(&context, &b_nrows, &b_ncols, &b_row, &b_col );
+  std::cout << "BRANK: " << b_rank << " BROW: " << b_row << " BCOL: " << b_col << std::endl;
+
+  // Compute the size of the local phi matrices
+  int izero = 0;
+  // int PHI_size = PHI_rows*PHI_cols;
+  int phi_rows = numroc_( &PHI_rows, &rnb, &b_row, &izero, &b_nrows );
+  int phi_cols = numroc_( &PHI_cols, &cnb, &b_col, &izero, &b_ncols );
+
+  std::cout << "BRANK: " << b_rank << " phi_row: " << phi_rows << " phi_col " << phi_cols << std::endl;
 
-    std::cout << "Total structure count: " << StructureDB::count(config).first << std::endl;
-    std::cout << "Total natoms count: " << StructureDB::count(config).second << std::endl;
+#endif
 
-    StructureDB temp;
-    int nst = temp.add("db.train", 99, 3);
-    std::cout << "Number of structures loaded: " << temp.size() << std::endl;
-    std::cout << "Number of structures loaded: " << nst << std::endl;
-    std::cout << temp << std::endl;
-    std::cout << temp(0) << std::endl;
+  // once we know the size of the PHI, we can assign local phi matrices
 
 
+  for (const std::string &fn : config("DBFILE")) {
+    // get number of structures and tot atoms in all structures
+    std::pair<int,int> dbsize = StructureDB::count(fn);
+  }
 
-    if (is_verbose()) std::cout << "Finding nearest neighbours within: " <<
-        config.get<double>("RCUTMAX") << " cutoff distance..." << std::flush;
-    NNFinder nnf(config);
-    nnf.calc(stdb);
-    if (is_verbose()) std::cout << "Done!" << std::endl;
 
-    if (is_verbose()) std::cout << "Training start..." << std::flush;
+  if (is_verbose()) std::cout << "Loading structures..." << std::flush;
+  StructureDB stdb(config);
+  if (is_verbose()) std::cout << "Done!" << std::endl;
 
 
+  //StructureDB temp;
+  //int nst = temp.add("db.train", 99, 3);
+  //std::cout << "Number of structures loaded: " << temp.size() << std::endl;
+  //std::cout << "Number of structures loaded: " << nst << std::endl;
+  //std::cout << temp << std::endl;
+  //std::cout << temp(0) << std::endl;
 
-    model->train(stdb,dc);
 
-    if (is_verbose()) std::cout << "Done!" << std::endl;
 
-    Config param_file = model->get_param_file();
-    param_file.check_pot_file();
-    std::ofstream outfile;
-    outfile.open ("pot.tadah");
-    outfile << param_file << std::endl;;
+  if (is_verbose()) std::cout << "Finding nearest neighbours within: " <<
+    config.get<double>("RCUTMAX") << " cutoff distance..." << std::flush;
+  NNFinder nnf(config);
+  nnf.calc(stdb);
+  if (is_verbose()) std::cout << "Done!" << std::endl;
+
+  if (is_verbose()) std::cout << "Training start..." << std::flush;
 
-    if(train->count("--uncertainty")) {
-        t_type weights = model->get_weights();
-        t_type unc = model->get_weights_uncertainty();
-        Output(param_file,false).print_train_unc(weights, unc);
-    }
 
-    if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
 
-    if(model)
-        delete model;
-    if(fb)
-        delete fb;
+  model->train(stdb,dc);
+
+  if (is_verbose()) std::cout << "Done!" << std::endl;
+
+  Config param_file = model->get_param_file();
+  param_file.check_pot_file();
+  std::ofstream outfile;
+  outfile.open ("pot.tadah");
+  outfile << param_file << std::endl;;
+
+  if(train->count("--uncertainty")) {
+    t_type weights = model->get_weights();
+    t_type unc = model->get_weights_uncertainty();
+    Output(param_file,false).print_train_unc(weights, unc);
+  }
+
+  if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
+
+  if(model)
+    delete model;
+  if(fb)
+    delete fb;
 }
 
 void TadahCLI::subcommand_predict() {
-    CLI::Timer timer_tot {"Prediction", CLI::Timer::Big};
-    if(predict->count("--verbose"))
-        set_verbose();
-    if (is_verbose()) std::cout << "Prediction mode" << std::endl;
-    Config pot_config(pot_file);
-    pot_config.check_for_predict();
-    pot_config.remove("VERBOSE");
-    if(predict->count("--config")) {
-        pot_config.add(config_file);
-        if (!pot_config.exist("VERBOSE"))
-            pot_config.add("VERBOSE",0);
-    }
-    else if(predict->count("--datasets")) {
-        for (auto &ds: datasets) {
-            pot_config.add("DBFILE",ds);
-        }
-        pot_config.add("VERBOSE",0);
-    }
-    else {
-        std::runtime_error("Either provide config file\n\
-                or datasets in a command line");
+  CLI::Timer timer_tot {"Prediction", CLI::Timer::Big};
+  if(predict->count("--verbose"))
+    set_verbose();
+  if (is_verbose()) std::cout << "Prediction mode" << std::endl;
+  Config pot_config(pot_file);
+  pot_config.check_for_predict();
+  pot_config.remove("VERBOSE");
+  if(predict->count("--config")) {
+    pot_config.add(config_file);
+    if (!pot_config.exist("VERBOSE"))
+      pot_config.add("VERBOSE",0);
+  }
+  else if(predict->count("--datasets")) {
+    for (auto &ds: datasets) {
+      pot_config.add("DBFILE",ds);
     }
+    pot_config.add("VERBOSE",0);
+  }
+  else {
+    std::runtime_error("Either provide config file\n\
+        or datasets in a command line");
+  }
 
-    // If force or Stress flag is set add it to config
-    // overwrite existing flags if present.
-    // If flags are not set either keep existing
-    // setting from the config file or set to false
-    if(predict->count("--Force")) {
-        pot_config.remove("FORCE");
-        pot_config.add("FORCE", "true");
-    }
-    if(predict->count("--Stress")) {
-        pot_config.remove("STRESS");
-        pot_config.add("STRESS", "true");
-    }
+  // If force or Stress flag is set add it to config
+  // overwrite existing flags if present.
+  // If flags are not set either keep existing
+  // setting from the config file or set to false
+  if(predict->count("--Force")) {
+    pot_config.remove("FORCE");
+    pot_config.add("FORCE", "true");
+  }
+  if(predict->count("--Stress")) {
+    pot_config.remove("STRESS");
+    pot_config.add("STRESS", "true");
+  }
 
-    DC_Selector DCS(pot_config);
+  DC_Selector DCS(pot_config);
 
-    if (is_verbose()) std::cout << "Loading structures..." << std::flush;
-    StructureDB stdb(pot_config);
-    if (is_verbose()) std::cout << "Done!" << std::endl;
+  if (is_verbose()) std::cout << "Loading structures..." << std::flush;
+  StructureDB stdb(pot_config);
+  if (is_verbose()) std::cout << "Done!" << std::endl;
 
-    if (is_verbose()) std::cout << "Finding nearest neighbours within: " <<
-        pot_config.get<double>("RCUTMAX") << " cutoff distance..." << std::flush;
-    NNFinder nnf(pot_config);
-    nnf.calc(stdb);
-    if (is_verbose()) std::cout << "Done!" << std::endl;
+  if (is_verbose()) std::cout << "Finding nearest neighbours within: " <<
+    pot_config.get<double>("RCUTMAX") << " cutoff distance..." << std::flush;
+  NNFinder nnf(pot_config);
+  nnf.calc(stdb);
+  if (is_verbose()) std::cout << "Done!" << std::endl;
 
-    DescriptorsCalc<> dc(pot_config,*DCS.d2b,*DCS.d3b,*DCS.dmb,
-            *DCS.c2b,*DCS.c3b,*DCS.cmb);
+  DescriptorsCalc<> dc(pot_config,*DCS.d2b,*DCS.d3b,*DCS.dmb,
+      *DCS.c2b,*DCS.c3b,*DCS.cmb);
 
 
 
-    if (is_verbose()) std::cout << "Prediction start..." << std::flush;
-    DM_Function_Base *fb = CONFIG::factory<DM_Function_Base,Config&>(
-            pot_config.get<std::string>("MODEL",1),pot_config);
-    M_Tadah_Base *modelp = CONFIG::factory<M_Tadah_Base,DM_Function_Base&,Config&>(
-            pot_config.get<std::string>("MODEL",0),*fb,pot_config);
+  if (is_verbose()) std::cout << "Prediction start..." << std::flush;
+  DM_Function_Base *fb = CONFIG::factory<DM_Function_Base,Config&>(
+      pot_config.get<std::string>("MODEL",1),pot_config);
+  M_Tadah_Base *modelp = CONFIG::factory<M_Tadah_Base,DM_Function_Base&,Config&>(
+      pot_config.get<std::string>("MODEL",0),*fb,pot_config);
 
-    StructureDB stpred;
-    aed_type2 predicted_error;
-    if (predict->count("--error")) {
-        stpred = modelp->predict(pot_config,stdb,dc,predicted_error);
-    }
-    else {
+  StructureDB stpred;
+  aed_type2 predicted_error;
+  if (predict->count("--error")) {
+    stpred = modelp->predict(pot_config,stdb,dc,predicted_error);
+  }
+  else {
 
-        stpred = modelp->predict(pot_config,stdb,dc);
-    }
+    stpred = modelp->predict(pot_config,stdb,dc);
+  }
 
 
-    if (is_verbose()) std::cout << "Done!" << std::endl;
+  if (is_verbose()) std::cout << "Done!" << std::endl;
 
-    if (is_verbose()) std::cout << "Dumping output..." << std::flush;
+  if (is_verbose()) std::cout << "Dumping output..." << std::flush;
 
-    Output output(pot_config,predict->count("--error"));
-    output.print_predict_all(stdb,stpred,predicted_error);
+  Output output(pot_config,predict->count("--error"));
+  output.print_predict_all(stdb,stpred,predicted_error);
 
-    if (is_verbose()) std::cout << "Done!" << std::endl;
-    if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
+  if (is_verbose()) std::cout << "Done!" << std::endl;
+  if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
 
-    if(predict->count("--analytics")) {
-        Analytics a(stdb,stpred);
+  if(predict->count("--analytics")) {
+    Analytics a(stdb,stpred);
 
-        std::cout << "Energy MAE (meV/atom): " << 1000*a.calc_e_mae() << std::endl;
-        std::cout << "Energy RMSE (meV/atom): " << 1000*a.calc_e_rmse() << std::endl;
-        std::cout << "Energy R^2: " << a.calc_e_r_sq() << std::endl;
+    std::cout << "Energy MAE (meV/atom): " << 1000*a.calc_e_mae() << std::endl;
+    std::cout << "Energy RMSE (meV/atom): " << 1000*a.calc_e_rmse() << std::endl;
+    std::cout << "Energy R^2: " << a.calc_e_r_sq() << std::endl;
 
-        if (predict->count("--Force")) {
-            std::cout << "Force MAE (eV/A): "<< a.calc_f_mae() << std::endl;
-            std::cout << "Force RMSE (eV/A): "<< a.calc_f_rmse() << std::endl;
-            std::cout << "Force R^2: " << a.calc_f_r_sq() << std::endl;
-        }
+    if (predict->count("--Force")) {
+      std::cout << "Force MAE (eV/A): "<< a.calc_f_mae() << std::endl;
+      std::cout << "Force RMSE (eV/A): "<< a.calc_f_rmse() << std::endl;
+      std::cout << "Force R^2: " << a.calc_f_r_sq() << std::endl;
+    }
 
-        if (predict->count("--Stress")) {
-            std::cout << "Stress MAE (eV): "<< a.calc_s_mae() << std::endl;
-            std::cout << "Stress RMSE (eV): "<< a.calc_s_rmse() << std::endl;
-            std::cout << "Stress R^2: " << a.calc_s_r_sq() << std::endl;
-        }
+    if (predict->count("--Stress")) {
+      std::cout << "Stress MAE (eV): "<< a.calc_s_mae() << std::endl;
+      std::cout << "Stress RMSE (eV): "<< a.calc_s_rmse() << std::endl;
+      std::cout << "Stress R^2: " << a.calc_s_r_sq() << std::endl;
     }
+  }
 
-    if(modelp)
-        delete modelp;
-    if(fb)
-        delete fb;
+  if(modelp)
+    delete modelp;
+  if(fb)
+    delete fb;
 }
 
 void TadahCLI::subcommand_hpo(
-        [[maybe_unused]]int argc,
-        [[maybe_unused]]char**argv) {
+    [[maybe_unused]]int argc,
+    [[maybe_unused]]char**argv) {
 #ifdef TADAH_ENABLE_HPO
-    CLI::Timer timer_tot {"HPO", CLI::Timer::Big};
-    // the number of processes in MPI_COMM_WORLD
-    int nproc=1;
-    // the rank of this process in MPI_COMM_WORLD
-    int rank=0;
+  CLI::Timer timer_tot {"HPO", CLI::Timer::Big};
+  // the number of processes in MPI_COMM_WORLD
+  int ncpu=1;
+  // the rank of this process in MPI_COMM_WORLD
+  int rank=0;
 
 #ifdef TADAH_BUILD_MPI
-    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Status status;  
+  MPI_Comm_size(MPI_COMM_WORLD, &ncpu);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Status status;  
 #endif
 
-    if(hpo->count("--verbose"))
-        set_verbose();
-    Config config(config_file);
-    config.remove("CHECKPRESS");
-    config.add("CHECKPRESS", "true");
-    config.check_for_training();
-    if (is_verbose()) std::cout << "HPO mode" << std::endl;
-
-    if(hpo->count("--Force")) {
-        config.remove("FORCE");
-        config.add("FORCE", "true");
-    }
-    if(hpo->count("--Stress")) {
-        config.remove("STRESS");
-        config.add("STRESS", "true");
-    }
-    
-    if (hpo->count("--dtargets")) {
-        // list of indices to the trg vec to process by this proc
-        std::vector<int>local_trg_indices;
-
-        // Everyone read files from the provided directory
-        std::vector<fs::path> trg=read_targets(targets_dir);
-
-        // the number of files the process will work on
-        // also the size of an array it will get from the root process.
-        int s;
-        if ( rank == 0 ) {
-            // root proc distributes work equally between available processes
-            // Each process will recieve an array of integers.
-            // Integers correspond to indices in the trg vector
-            // e.g. indices 3,4 indicate that the process
-            // should work on target files trg[3] and trg[4]
-
-            // prep indices array and fill from 0 to trg.size()-1
-            std::vector<int> trg_idx(trg.size());
-            std::iota (std::begin(trg_idx), std::end(trg_idx), 0);
-
-            // Establish the number of target files per process.
-            // The work should be evenly distributed.
-            // e.g.
-            // For 4 processes and 19 files
-            // 5 5 5 4
-            std::vector<int> counts(nproc,0);
-            for (size_t i=0;i<trg.size();++i) {
-                counts[i%nproc]++;
-            }
-
-            // Keep first index to sent to each proc
-            // For example above 0,5,10,15
-            std::vector<int> first_idx(nproc);
-            int sum=0;
-            for (int i=0;i<nproc; ++i) {
-                first_idx[i]=sum;
-                sum+=counts[i];
-            }
-
-            // First send expected size of a chunk
-            for (int p = 1; p < nproc; p++ ){
-                s=counts[p];
+  if(hpo->count("--verbose"))
+    set_verbose();
+  Config config(config_file);
+  config.remove("CHECKPRESS");
+  config.add("CHECKPRESS", "true");
+  config.check_for_training();
+  if (is_verbose()) std::cout << "HPO mode" << std::endl;
+
+  if(hpo->count("--Force")) {
+    config.remove("FORCE");
+    config.add("FORCE", "true");
+  }
+  if(hpo->count("--Stress")) {
+    config.remove("STRESS");
+    config.add("STRESS", "true");
+  }
+
+  if (hpo->count("--dtargets")) {
+    // list of indices to the trg vec to process by this proc
+    std::vector<int>local_trg_indices;
+
+    // Everyone read files from the provided directory
+    std::vector<fs::path> trg=read_targets(targets_dir);
+
+    // the number of files the process will work on
+    // also the size of an array it will get from the root process.
+    int s;
+    if ( rank == 0 ) {
+      // root proc distributes work equally between available processes
+      // Each process will recieve an array of integers.
+      // Integers correspond to indices in the trg vector
+      // e.g. indices 3,4 indicate that the process
+      // should work on target files trg[3] and trg[4]
+
+      // prep indices array and fill from 0 to trg.size()-1
+      std::vector<int> trg_idx(trg.size());
+      std::iota (std::begin(trg_idx), std::end(trg_idx), 0);
+
+      // Establish the number of target files per process.
+      // The work should be evenly distributed.
+      // e.g.
+      // For 4 processes and 19 files
+      // 5 5 5 4
+      std::vector<int> counts(ncpu,0);
+      for (size_t i=0;i<trg.size();++i) {
+        counts[i%ncpu]++;
+      }
+
+      // Keep first index to sent to each proc
+      // For example above 0,5,10,15
+      std::vector<int> first_idx(ncpu);
+      int sum=0;
+      for (int i=0;i<ncpu; ++i) {
+        first_idx[i]=sum;
+        sum+=counts[i];
+      }
+
+      // First send expected size of a chunk
+      for (int p = 1; p < ncpu; p++ ){
+        s=counts[p];
 #ifdef TADAH_BUILD_MPI
-                MPI_Send ( &s, 1, MPI_INT, p, 99, MPI_COMM_WORLD );
+        MPI_Send ( &s, 1, MPI_INT, p, 99, MPI_COMM_WORLD );
 #endif
-            }
+      }
 
-            // and prepare root process for its own work
-            s=counts[rank];
-            local_trg_indices.resize(s);
+      // and prepare root process for its own work
+      s=counts[rank];
+      local_trg_indices.resize(s);
 #ifdef TADAH_BUILD_MPI
-            MPI_Scatterv(trg_idx.data(), counts.data(), first_idx.data(),
-                    MPI_INT, local_trg_indices.data(), s, MPI_INT, 0, MPI_COMM_WORLD);
+      MPI_Scatterv(trg_idx.data(), counts.data(), first_idx.data(),
+          MPI_INT, local_trg_indices.data(), s, MPI_INT, 0, MPI_COMM_WORLD);
 #endif
-        }
-        else  {
+    }
+    else  {
 #ifdef TADAH_BUILD_MPI
-            // Get the size of work to be done
-            MPI_Recv ( &s, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, &status );
+      // Get the size of work to be done
+      MPI_Recv ( &s, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, &status );
 #endif
-            // We know the amount of work, so can resize array
-            local_trg_indices.resize(s);
+      // We know the amount of work, so can resize array
+      local_trg_indices.resize(s);
 
 
 #ifdef TADAH_BUILD_MPI
-            // Finally get indices to the trg array
-            MPI_Scatterv(NULL, NULL, NULL, MPI_INT, local_trg_indices.data(),
-                    s, MPI_INT, 0, MPI_COMM_WORLD);
+      // Finally get indices to the trg array
+      MPI_Scatterv(NULL, NULL, NULL, MPI_INT, local_trg_indices.data(),
+          s, MPI_INT, 0, MPI_COMM_WORLD);
 #endif
-        }
-
-        // Finally, we can start working on each target file assigned.
-        // But first we have to create sensible directory structure
-        // where we can dump all the output. 
-        // The user provides the outdir which we assume that is empty 
-        // and create subdirectories which corresponds to the names
-        // of the target files. All paths should be absolute.
-        // Note that we do not parallelise here over threads
-        // because parallelisation is done elsewhere.
- 
-        // Keep program current working directory
-        fs::path cwd = fs::absolute(fs::current_path());
-
-        // Prepare outdir path
-        fs::path outdir = cwd.append(targets_out_dir);
-
-        for (const int idx: local_trg_indices) {
-            // Build an absolute path for target computations
-            // inside the user specified outdir.
-            // Use the name of the target file without extenstion
-            fs::path outdir_target = outdir;
-            outdir_target /= trg[idx].filename().replace_extension("");
-
-            // Create output dir for trg[idx] target
-            std::filesystem::create_directory(outdir_target);
-
-            // Copy target file to its output directory
-            // so the user has a copy for future reference
-            fs::copy(fs::absolute(trg[idx]),outdir_target);
-
-            // Change to target working directory
-            std::filesystem::current_path(outdir_target);
-
-            // Get the name for a target file
-            std::string target_file = trg[idx].filename();
-
-            // Run computation
-            hpo_run(config, target_file);
-
-            // Just in case return to where we started
-            std::filesystem::current_path(cwd);
-        }
     }
-    else if(hpo->count("--target")) {
-        hpo_run(config, target_file);
+
+    // Finally, we can start working on each target file assigned.
+    // But first we have to create sensible directory structure
+    // where we can dump all the output. 
+    // The user provides the outdir which we assume that is empty 
+    // and create subdirectories which corresponds to the names
+    // of the target files. All paths should be absolute.
+    // Note that we do not parallelise here over threads
+    // because parallelisation is done elsewhere.
+
+    // Keep program current working directory
+    fs::path cwd = fs::absolute(fs::current_path());
+
+    // Prepare outdir path
+    fs::path outdir = cwd.append(targets_out_dir);
+
+    for (const int idx: local_trg_indices) {
+      // Build an absolute path for target computations
+      // inside the user specified outdir.
+      // Use the name of the target file without extenstion
+      fs::path outdir_target = outdir;
+      outdir_target /= trg[idx].filename().replace_extension("");
+
+      // Create output dir for trg[idx] target
+      std::filesystem::create_directory(outdir_target);
+
+      // Copy target file to its output directory
+      // so the user has a copy for future reference
+      fs::copy(fs::absolute(trg[idx]),outdir_target);
+
+      // Change to target working directory
+      std::filesystem::current_path(outdir_target);
+
+      // Get the name for a target file
+      std::string target_file = trg[idx].filename();
+
+      // Run computation
+      hpo_run(config, target_file);
+
+      // Just in case return to where we started
+      std::filesystem::current_path(cwd);
     }
+  }
+  else if(hpo->count("--target")) {
+    hpo_run(config, target_file);
+  }
 
-    if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
+  if (is_verbose()) std::cout << timer_tot.to_string() << std::endl;
 #else
-    std::cout << "-----------------------------------------------" << std::endl;
-    std::cout << "This subcommand is not supported by this build." << std::endl;
-    std::cout << "Ta-dah! Must by compiled with HPO support." << std::endl;
-    std::cout << "See documentation for details." << std::endl;
-    std::cout << "-----------------------------------------------" << std::endl;
+  std::cout << "-----------------------------------------------" << std::endl;
+  std::cout << "This subcommand is not supported by this build." << std::endl;
+  std::cout << "Ta-dah! Must by compiled with HPO support." << std::endl;
+  std::cout << "See documentation for details." << std::endl;
+  std::cout << "-----------------------------------------------" << std::endl;
 #endif
 }
 bool TadahCLI::is_verbose() {
-    return verbose;
+  return verbose;
 }
 void TadahCLI::set_verbose() {
-    verbose=true;
+  verbose=true;
 }
 void TadahCLI::flag_version() {
-    int v_world = TADAH_WORLD_VERSION;
-    int v_major = TADAH_MAJOR_VERSION;
-    int v_minor = TADAH_MINOR_VERSION;
-    std::string v= "ta-dah version " + std::to_string(v_world)
-        + "." + std::to_string(v_major) + "." + std::to_string(v_minor);
-    std::cout << v << std::endl;
+  int v_world = TADAH_WORLD_VERSION;
+  int v_major = TADAH_MAJOR_VERSION;
+  int v_minor = TADAH_MINOR_VERSION;
+  std::string v= "ta-dah version " + std::to_string(v_world)
+    + "." + std::to_string(v_major) + "." + std::to_string(v_minor);
+  std::cout << v << std::endl;
 }
 std::vector<fs::path> TadahCLI::read_targets(std::string path) {
-    std::vector<fs::path> s=read_files(path);
-    if(s.size()==0) 
-        throw std::runtime_error(
-                "No files found in the targets directory: "
-                +targets_dir+"\n");
+  std::vector<fs::path> s=read_files(path);
+  if(s.size()==0) 
+    throw std::runtime_error(
+        "No files found in the targets directory: "
+        +targets_dir+"\n");
 
-    return s;
+  return s;
 }
 std::vector<fs::path> TadahCLI::read_files(std::string path) {
-    std::vector<fs::path> s;
-    for (const auto & entry : fs::directory_iterator(path))
-        if(fs::is_regular_file(entry))
-            s.push_back(fs::absolute(entry.path()));
-    return s;
+  std::vector<fs::path> s;
+  for (const auto & entry : fs::directory_iterator(path))
+    if(fs::is_regular_file(entry))
+      s.push_back(fs::absolute(entry.path()));
+  return s;
 }
 
 TadahCLI::TadahCLI():
@@ -421,164 +501,164 @@ TadahCLI::TadahCLI():
   app.add_flag("-v, --version",
       "Print version number and exit");
 
-/*---------------------------------------------------------------------------*/
-/*     Training                                                              */
-/*---------------------------------------------------------------------------*/
-	ss.str(std::string());
-    ss << "Train model using a config file.\n";
-
-    train = app.add_subcommand("train", ss.str());
-
-	ss.str(std::string());
-	ss << "A config file containing all model parameters\n";
-	ss << "and training dataset(s).\n";
-	ss << "See documentation for more details.\n";
-    train->add_option("-c,--config", config_file, ss.str())
-        ->option_text("CONFIG_FILE")
-        ->check(CLI::ExistingFile)
-        ->required();
-
-    train->add_flag("-F,--Force", "Train with forces.");
-    train->add_flag("-S,--Stress", "Train with stresses.");
-    train->add_flag("-v,--verbose", "Verbose mode on.");
-    train->add_flag("-u,--uncertainty", "Dump uncertainty on weights.");
-
-
-/*---------------------------------------------------------------------------*/
-/*     Prediction                                                            */
-/*---------------------------------------------------------------------------*/
-	ss.str(std::string());
-	ss << "Predict using already trained model.\n";
-	ss << "Energy per atom is always calculated.\n";
-	ss << "Calculation of forces and stresses is optional.\n\n";
-	ss << "Option 1:\n Use the config file with\n";
-	ss << "with DBFILE, FORCE and STRESS keys.\n";
-	ss << "Note: --Force and --Stress flags take\n";
-	ss << "priority over config keys.\n";
-	ss << "-e.g. ta-dah predict -c config.tadah -p pot.tadah\n\n";
-	ss << "Option 2:\n Provide datasets and flags in the command line.\n";
-	ss << "-e.g. ta-dah predict -p pot-tadah -FS -d db1.tadah\n";
-
-    predict = app.add_subcommand("predict", ss.str());
-
-	ss.str(std::string());
-	ss << "A config file containing prediction dataset(s).\n";
-	ss << "Required Config KEYS: DBFILE.\n";
-	ss << "Optional Config KEYS: FORCE, STRESS.\n";
-	ss << "See documentation for more details.\n";
-    predict->add_option("-c,--config", config_file,ss.str())
-        ->check(CLI::ExistingFile)
-        ->option_text("CONFIG_FILE");
-
-
-    predict->add_option("-d,--datasets", datasets,
-            "One or more datasets for prediction.")
-        ->option_text("DATASETS ...")
-        ->excludes("-c")
-        ->check(CLI::ExistingFile);
-
-    predict->add_flag("-F,--Force", "Predict forces.");
-    predict->add_flag("-S,--Stress", "Predict stresses.");
-    predict->add_flag("-v,--verbose", "Verbose mode on.");
-    predict->add_flag("-e,--error", "Predict model error.");
-    predict->add_flag("-a,--analytics",
-            "Compare predicted values to those in the original dataset.");
-
-    std::string pot_opt="-p,--potential";
-    predict->add_option(pot_opt, pot_file,
-            "Trained model")
-        ->option_text("POT_FILE")
-        ->check(CLI::ExistingFile)
-        ->required();
-    
-/*---------------------------------------------------------------------------*/
-/*     Hyperparameter Optimizer                                              */
-/*---------------------------------------------------------------------------*/
-    ss.str(std::string());
+  /*---------------------------------------------------------------------------*/
+  /*     Training                                                              */
+  /*---------------------------------------------------------------------------*/
+  ss.str(std::string());
+  ss << "Train model using a config file.\n";
+
+  train = app.add_subcommand("train", ss.str());
+
+  ss.str(std::string());
+  ss << "A config file containing all model parameters\n";
+  ss << "and training dataset(s).\n";
+  ss << "See documentation for more details.\n";
+  train->add_option("-c,--config", config_file, ss.str())
+    ->option_text("CONFIG_FILE")
+    ->check(CLI::ExistingFile)
+    ->required();
+
+  train->add_flag("-F,--Force", "Train with forces.");
+  train->add_flag("-S,--Stress", "Train with stresses.");
+  train->add_flag("-v,--verbose", "Verbose mode on.");
+  train->add_flag("-u,--uncertainty", "Dump uncertainty on weights.");
+
+
+  /*---------------------------------------------------------------------------*/
+  /*     Prediction                                                            */
+  /*---------------------------------------------------------------------------*/
+  ss.str(std::string());
+  ss << "Predict using already trained model.\n";
+  ss << "Energy per atom is always calculated.\n";
+  ss << "Calculation of forces and stresses is optional.\n\n";
+  ss << "Option 1:\n Use the config file with\n";
+  ss << "with DBFILE, FORCE and STRESS keys.\n";
+  ss << "Note: --Force and --Stress flags take\n";
+  ss << "priority over config keys.\n";
+  ss << "-e.g. ta-dah predict -c config.tadah -p pot.tadah\n\n";
+  ss << "Option 2:\n Provide datasets and flags in the command line.\n";
+  ss << "-e.g. ta-dah predict -p pot-tadah -FS -d db1.tadah\n";
+
+  predict = app.add_subcommand("predict", ss.str());
+
+  ss.str(std::string());
+  ss << "A config file containing prediction dataset(s).\n";
+  ss << "Required Config KEYS: DBFILE.\n";
+  ss << "Optional Config KEYS: FORCE, STRESS.\n";
+  ss << "See documentation for more details.\n";
+  predict->add_option("-c,--config", config_file,ss.str())
+    ->check(CLI::ExistingFile)
+    ->option_text("CONFIG_FILE");
+
+
+  predict->add_option("-d,--datasets", datasets,
+      "One or more datasets for prediction.")
+    ->option_text("DATASETS ...")
+    ->excludes("-c")
+    ->check(CLI::ExistingFile);
+
+  predict->add_flag("-F,--Force", "Predict forces.");
+  predict->add_flag("-S,--Stress", "Predict stresses.");
+  predict->add_flag("-v,--verbose", "Verbose mode on.");
+  predict->add_flag("-e,--error", "Predict model error.");
+  predict->add_flag("-a,--analytics",
+      "Compare predicted values to those in the original dataset.");
+
+  std::string pot_opt="-p,--potential";
+  predict->add_option(pot_opt, pot_file,
+      "Trained model")
+    ->option_text("POT_FILE")
+    ->check(CLI::ExistingFile)
+    ->required();
+
+  /*---------------------------------------------------------------------------*/
+  /*     Hyperparameter Optimizer                                              */
+  /*---------------------------------------------------------------------------*/
+  ss.str(std::string());
 #ifndef TADAH_ENABLE_HPO
-    ss << "(UNAVAILABLE)" << std::endl;
+  ss << "(UNAVAILABLE)" << std::endl;
 #endif
-    ss << "Optimise model's architecture.\n";
-    ss << "Find the best set of hyperparameters (HPs)\n";
-    ss << "subject to predefined constraints\n";
-    ss << "Minimises custom made loss function.\n";
+  ss << "Optimise model's architecture.\n";
+  ss << "Find the best set of hyperparameters (HPs)\n";
+  ss << "subject to predefined constraints\n";
+  ss << "Minimises custom made loss function.\n";
 
-    hpo = app.add_subcommand("hpo", ss.str());
+  hpo = app.add_subcommand("hpo", ss.str());
 
-	ss.str(std::string());
-	ss << "A config file containing all model parameters\n";
-	ss << "and training dataset(s).\n";
-	ss << "The model parameters will be optimised\n";
-	ss << "within constraints defined in the -t target file\n";
-	ss << "See documentation for more details.\n";
+  ss.str(std::string());
+  ss << "A config file containing all model parameters\n";
+  ss << "and training dataset(s).\n";
+  ss << "The model parameters will be optimised\n";
+  ss << "within constraints defined in the -t target file\n";
+  ss << "See documentation for more details.\n";
 
 #ifdef TADAH_ENABLE_HPO
-    auto c_opt=hpo->add_option("-c,--config", config_file, ss.str())
-        ->option_text("CONFIG_FILE")
-        ->check(CLI::ExistingFile)
-        ->required();
-
-	ss.str(std::string());
-	ss << "A config file containing model's targets\n";
-	ss << "and hyperparameters constraints.\n";
-	ss << "See documentation for more details.\n";
-    auto t_opt=hpo->add_option("-t,--target", target_file, ss.str())
-        ->option_text("TARGET_FILE")
-        ->check(CLI::ExistingFile);
-
-	ss.str(std::string());
-    ss << "Output directory for -d option.\n";
-    auto o_opt=hpo->add_option("-o,--out_dir", targets_out_dir, ss.str())
-        ->option_text("OUTPUT_DIRECTORY")
-        ->check(CLI::ExistingDirectory)
-        ->excludes(t_opt)
-        ->needs(c_opt);
-
-	ss.str(std::string());
-	ss << "A direcory containing model's target files.\n";
-	ss << "Each target file must contain a set of\n";
-    ss << "hyperparameter constraints as in the -t option.\n";
-    ss << "This option can be run with MPI.\n";
-    ss << "Each MPI process will run independent\n";
-    ss << "optimisation for every target in the directory\n";
-    ss << "resulting in N models for N targets.\n";
-	ss << "See documentation for more details.\n";
-    hpo->add_option("-d,--dtargets", targets_dir, ss.str())
-        ->option_text("TARGETS_DIRECTORY")
-        ->check(CLI::ExistingDirectory)
-        ->excludes(t_opt)
-        ->needs(c_opt)
-        ->needs(o_opt);
-
-
-    hpo->add_flag("-F,--Force", "Train with forces.");
-    hpo->add_flag("-S,--Stress", "Train with stresses.");
-    hpo->add_flag("-v,--verbose", "Verbose mode on.");
-    //hpo->add_flag("-u,--uncertainty",
-    //        "Dump uncertainty on weights."); // TODO check this
+  auto c_opt=hpo->add_option("-c,--config", config_file, ss.str())
+    ->option_text("CONFIG_FILE")
+    ->check(CLI::ExistingFile)
+    ->required();
+
+  ss.str(std::string());
+  ss << "A config file containing model's targets\n";
+  ss << "and hyperparameters constraints.\n";
+  ss << "See documentation for more details.\n";
+  auto t_opt=hpo->add_option("-t,--target", target_file, ss.str())
+    ->option_text("TARGET_FILE")
+    ->check(CLI::ExistingFile);
+
+  ss.str(std::string());
+  ss << "Output directory for -d option.\n";
+  auto o_opt=hpo->add_option("-o,--out_dir", targets_out_dir, ss.str())
+    ->option_text("OUTPUT_DIRECTORY")
+    ->check(CLI::ExistingDirectory)
+    ->excludes(t_opt)
+    ->needs(c_opt);
+
+  ss.str(std::string());
+  ss << "A direcory containing model's target files.\n";
+  ss << "Each target file must contain a set of\n";
+  ss << "hyperparameter constraints as in the -t option.\n";
+  ss << "This option can be run with MPI.\n";
+  ss << "Each MPI process will run independent\n";
+  ss << "optimisation for every target in the directory\n";
+  ss << "resulting in N models for N targets.\n";
+  ss << "See documentation for more details.\n";
+  hpo->add_option("-d,--dtargets", targets_dir, ss.str())
+    ->option_text("TARGETS_DIRECTORY")
+    ->check(CLI::ExistingDirectory)
+    ->excludes(t_opt)
+    ->needs(c_opt)
+    ->needs(o_opt);
+
+
+  hpo->add_flag("-F,--Force", "Train with forces.");
+  hpo->add_flag("-S,--Stress", "Train with stresses.");
+  hpo->add_flag("-v,--verbose", "Verbose mode on.");
+  //hpo->add_flag("-u,--uncertainty",
+  //        "Dump uncertainty on weights."); // TODO check this
 #endif
 }
 
 void TadahCLI::run(int argc, char** argv) {
 
-    if(app.count_all()==1) {
-        std::cout << "Type: `ta-dah -h` for help" << std::endl;
-    }
-
-    if (app.count("--version")) {
-        flag_version();
-    }
-    // HPO
-    if (hpo->count_all()) {
-        subcommand_hpo(argc,argv);
-    }
-    // TRAINING
-    if (train->count_all()) {
-        subcommand_train();
-    }
-
-    // PREDICTION
-    else if (predict->count_all()) {
-        subcommand_predict();
-    }
+  if(app.count_all()==1) {
+    std::cout << "Type: `ta-dah -h` for help" << std::endl;
+  }
+
+  if (app.count("--version")) {
+    flag_version();
+  }
+  // HPO
+  if (hpo->count_all()) {
+    subcommand_hpo(argc,argv);
+  }
+  // TRAINING
+  if (train->count_all()) {
+    subcommand_train();
+  }
+
+  // PREDICTION
+  else if (predict->count_all()) {
+    subcommand_predict();
+  }
 }
-- 
GitLab