Skip to content
Snippets Groups Projects
Commit 696a2efa authored by Marcin Kirsz's avatar Marcin Kirsz
Browse files

Introduced custom data types to allows sending rows of a Phi matrix which is...

Introduced custom data types to allows sending rows of a Phi matrix which is stored in a column-major order.
parent 346a4f6b
No related branches found
No related tags found
2 merge requests!5MPI version of tadah,!3MPI version of Tadah
Pipeline #37488 failed
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
#include <iostream> #include <iostream>
#include <stdexcept> #include <stdexcept>
#include <chrono> #include <chrono>
#include <thread> #include <thread>
...@@ -28,20 +26,16 @@ extern "C" void blacs_get_(int*, int*, int*); ...@@ -28,20 +26,16 @@ extern "C" void blacs_get_(int*, int*, int*);
extern "C" void blacs_pinfo_(int*, int*); extern "C" void blacs_pinfo_(int*, int*);
extern "C" void blacs_gridinit_(int*, char*, int*, int*); extern "C" void blacs_gridinit_(int*, char*, int*, int*);
extern "C" void blacs_gridinfo_(int*, int*, int*, int*, int*); extern "C" void blacs_gridinfo_(int*, int*, int*, int*, int*);
extern "C" void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); extern "C" void descinit_(int*, int*, int*, int*, int*,
int*, int*, int*, int*, int*);
extern "C" void pdpotrf_(char*, int*, double*, int*, int*, int*, int*); extern "C" void pdpotrf_(char*, int*, double*, int*, int*, int*, int*);
extern "C" void blacs_gridexit_(int*); extern "C" void blacs_gridexit_(int*);
extern "C" int numroc_(int*, int*, int*, int*, int*); extern "C" int numroc_(int*, int*, int*, int*, int*);
extern "C" void pdgels_(char* trans, int* m, int* n, int* nrhs,
// (transa, m, n, nrhs, a, ia, ja, desc_a, b, ib, jb, desc_b, work, lwork, info); double* a, int* ia, int* ja, int* desca, double* b, int* ib,
extern "C" void pdgels_(char* trans, int* m, int* n, int* nrhs, double* a, int* ia, int* ja, int* desca, int* jb, int* descb, double* work, int* lwork, int* info);
double* b, int* ib, int* jb, int* descb, double* work, int* lwork, int* info);
//extern "C" void pdgels_(char*, int*, int*, int*,double*,int*,int*,int*,double*,int*,int*,int*,double*, int*, int*);
extern "C" void pdgemr2d_(int *m, int *n, double *a, int *ia, int *ja, int *desca, extern "C" void pdgemr2d_(int *m, int *n, double *a, int *ia, int *ja, int *desca,
double *b, int *ib, int *jb, int *descb, int *context); double *b, int *ib, int *jb, int *descb, int *context);
#endif #endif
void TadahCLI::subcommand_train() { void TadahCLI::subcommand_train() {
...@@ -62,6 +56,21 @@ void TadahCLI::subcommand_train() { ...@@ -62,6 +56,21 @@ void TadahCLI::subcommand_train() {
/* MPI CODE: /* MPI CODE:
* The PHI matrix is divided into local phi matrices. * The PHI matrix is divided into local phi matrices.
* *
* The code follows host-worker pattern. Where host (rank 0)
* distributes work between remaining workers (rank>0).
* The work is distributed in packages.
* Each package contains: filename, first structure to be read
* and the number of structures to process.
*
* Host does not calculate phi but still allocates memory to it.
* Each worker fills its local phi.
* Once the local phi matrix is full a worker will ask host
* for a rank to send excees data.
* Host will first try to fill its own local phi.
* Once host phi matrix is full, the host will search for a worker
* with with free space in its local phi.
*
*
* 1. each rank reads config file * 1. each rank reads config file
* 2. host calculates total number of structures * 2. host calculates total number of structures
* 3. host calculates the dimensions of the PHI matrix * 3. host calculates the dimensions of the PHI matrix
...@@ -71,7 +80,6 @@ void TadahCLI::subcommand_train() { ...@@ -71,7 +80,6 @@ void TadahCLI::subcommand_train() {
* 6. Each worker allocates memory for a local matrix phi * 6. Each worker allocates memory for a local matrix phi
*/ */
CLI::Timer timer_tot {"Training", CLI::Timer::Big}; CLI::Timer timer_tot {"Training", CLI::Timer::Big};
if(train->count("--verbose")) if(train->count("--verbose"))
...@@ -129,44 +137,72 @@ void TadahCLI::subcommand_train() { ...@@ -129,44 +137,72 @@ void TadahCLI::subcommand_train() {
std::cout << "PHI ROWS: " << PHI_rows << std::endl; std::cout << "PHI ROWS: " << PHI_rows << std::endl;
std::cout << "PHI COLS: " << PHI_cols << std::endl; std::cout << "PHI COLS: " << PHI_cols << std::endl;
// the number of rows must by divisable by the b_nrows
// the number of cols must by divisable by the b_ncols
// Initialize BLACS // Initialize BLACS
// We create two contexts.
// context1 is used for the computation of phi matrices
// context2 is used for distribution of local phi to "block cyclic phi"
int b_rank; int b_rank;
int b_row; int context1;
int b_col; int context2;
int context; int b_row1;
int b_nrows; // Number of row procs int b_col1;
int b_ncols; // Number of column procs int b_row2;
int rnb = 1; // Row block size TODO int b_col2;
int cnb = PHI_cols; // Column block size TODO int b_nrows1; // Number of row procs
int b_ncols1; // Number of column procs
int b_nrows2; // Number of row procs
int b_ncols2; // Number of column procs
int rnb1 = 1; // Row block size TODO
int rnb2 = 1; // Row block size TODO
int cnb1 = PHI_cols; // Column block size TODO
int cnb2 = 1; // Column block size TODO
blacs_pinfo_(&b_rank, &ncpu) ; // BLACS rank and world size blacs_pinfo_(&b_rank, &ncpu) ; // BLACS rank and world size
//std::cout << "RANK: " << rank << " BRANK: " << b_rank << " ncpu: " << ncpu << std::endl; //std::cout << "RANK: " << rank << " BRANK: " << b_rank << " ncpu: " << ncpu << std::endl;
b_ncols = 1; b_ncols1 = 1;
b_nrows = ncpu; b_nrows1 = ncpu;
assert(b_nrows * b_ncols == ncpu); assert(b_nrows1 * b_ncols1 == ncpu);
//std::cout << "b_ncols: " << b_ncols << " b_nrows: " << b_nrows << std::endl; b_ncols2 = 2;
int zero = 0; b_nrows2 = ncpu / b_ncols2;
blacs_get_(&zero,&zero, &context ); // -> Create context assert(b_nrows2 * b_ncols2 == ncpu);
//std::cout << "ZERO: " << zero << " CONTEXT: " << context << std::endl;
char layout='R'; // Block cyclic, Row major processor mapping
blacs_gridinit_(&context, &layout, &b_nrows, &b_ncols ); // Context -> Initialize the grid
blacs_gridinfo_(&context, &b_nrows, &b_ncols, &b_row, &b_col );
//std::cout << "BRANK: " << b_rank << " BROW: " << b_row << " BCOL: " << b_col << std::endl;
printf( "rank = %d, nprow = %d, npcol = %d, myrow = %d, mycol = %d\n", rank, b_nrows, b_ncols, b_row, b_col );
// Compute the size of the local phi matrices //std::cout << "b_ncols1: " << b_ncols1 << " b_nrows1: " << b_nrows1 << std::endl;
int izero = 0; int izero = 0;
int ione = 1; int ione = 1;
char layout='R'; // Block cyclic, Row major processor mapping
char layout2='R'; // Block cyclic, Row major processor mapping
// Create first context
blacs_get_(&izero,&izero, &context1 ); // -> Create context1
blacs_gridinit_(&context1, &layout, &b_nrows1, &b_ncols1 ); // context1 -> Initialize the grid
blacs_gridinfo_(&context1, &b_nrows1, &b_ncols1, &b_row1, &b_col1 );
//
// Create second context
blacs_get_(&izero,&izero, &context2 ); // -> Create context2
blacs_gridinit_(&context2, &layout2, &b_nrows2, &b_ncols2 ); // context1 -> Initialize the grid
blacs_gridinfo_(&context2, &b_nrows2, &b_ncols2, &b_row2, &b_col2 );
//std::cout << "BRANK: " << b_rank << " BROW: " << b_row1 << " BCOL: " << b_col1 << std::endl;
printf( "rank = %d, nprow = %d, npcol = %d, myrow = %d, mycol = %d\n", rank, b_nrows1, b_ncols1, b_row1, b_col1 );
std::cout << "izero: " << izero << " context1: " << context1 << std::endl;
std::cout << "ione: " << ione << " context2: " << context2 << std::endl;
// Compute the size of the local phi matrices
// int PHI_size = PHI_rows*PHI_cols; // int PHI_size = PHI_rows*PHI_cols;
int phi_rows = numroc_( &PHI_rows, &rnb, &b_row, &izero, &b_nrows ); int phi_rows = numroc_( &PHI_rows, &rnb1, &b_row1, &izero, &b_nrows1 );
int phi_cols = numroc_( &PHI_cols, &cnb, &b_col, &izero, &b_ncols ); int phi_rows2 = numroc_( &PHI_rows, &rnb2, &b_row2, &izero, &b_nrows2 );
int phi_cols = numroc_( &PHI_cols, &cnb1, &b_col1, &izero, &b_ncols1 );
int phi_cols2 = numroc_( &PHI_cols, &cnb2, &b_col2, &izero, &b_ncols2 );
std::cout << "BRANK: " << b_rank << " phi_rows: " << phi_rows << " phi_col " << phi_cols << std::endl; std::cout << "BRANK: " << b_rank << " phi_rows: " << phi_rows << " phi_col " << phi_cols << std::endl;
// Define MPI datatype to send rows from the PHI matrix with column-major order
MPI_Datatype rowvec, rowvecs;
MPI_Type_vector( phi_cols, 1, phi_rows, MPI_DOUBLE, &rowvec);
MPI_Type_commit(&rowvec);
MPI_Type_create_resized(rowvec, 0, 1*sizeof(double), &rowvecs);
MPI_Type_commit(&rowvecs);
// COUNTERS // COUNTERS
size_t phi_row = 0; // next row to be filled in the local phi array size_t phi_row = 0; // next row to be filled in the local phi array
int rows_available=phi_rows; // number of available rows in the local phi array int rows_available=phi_rows; // number of available rows in the local phi array
...@@ -182,7 +218,7 @@ void TadahCLI::subcommand_train() { ...@@ -182,7 +218,7 @@ void TadahCLI::subcommand_train() {
// HOST: prepare work packages // HOST: prepare work packages
// filename, first structure index, number of structures to read // filename, first structure index, number of structures to read
std::vector<std::tuple<std::string,int,int>> wpckgs; std::vector<std::tuple<std::string,int,int>> wpckgs;
int nstruc = 73; // TODO the number of structures in a single work package int nstruc = 10; // TODO the number of structures in a single work package
for (const std::string &fn : config("DBFILE")) { for (const std::string &fn : config("DBFILE")) {
// get number of structures // get number of structures
int dbsize = StructureDB::count(fn).first; int dbsize = StructureDB::count(fn).first;
...@@ -247,13 +283,19 @@ void TadahCLI::subcommand_train() { ...@@ -247,13 +283,19 @@ void TadahCLI::subcommand_train() {
MPI_Send (&b_rank, 1, MPI_INT, worker, tag, MPI_COMM_WORLD); MPI_Send (&b_rank, 1, MPI_INT, worker, tag, MPI_COMM_WORLD);
MPI_Send (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD); MPI_Send (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD);
// this is actually necesarry for a worker to know number of rows
//MPI_Recv (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status);
// get data... // get data...
MPI_Probe(worker, tag, MPI_COMM_WORLD, &status); //MPI_Probe(worker, tag, MPI_COMM_WORLD, &status);
int arr_size = rows_accepted*phi_cols; //int arr_size = rows_accepted*phi_cols;
int arr_size2; //int arr_size2;
MPI_Get_count(&status, MPI_DOUBLE, &arr_size2); //MPI_Get_count(&status, MPI_DOUBLE, &arr_size2);
if (arr_size2!=arr_size) { throw std::runtime_error("arr_size != arr_size2\n");} //if (arr_size2!=arr_size) { throw std::runtime_error("arr_size != arr_size2\n");}
MPI_Recv (&dm.Phi.data()[phi_row], arr_size, MPI_DOUBLE, worker, tag, MPI_COMM_WORLD, &status); //MPI_Recv (&dm.Phi.data()[phi_row], arr_size, MPI_DOUBLE, worker, tag, MPI_COMM_WORLD, &status);
MPI_Recv (&dm.Phi.data()[phi_row], rows_accepted, rowvecs, worker, tag, MPI_COMM_WORLD, &status);
//for (int i=0; i<rows_accepted;++i)
// MPI_Recv (&dm.Phi.data()[phi_row+i], 1, rowvec, worker, tag, MPI_COMM_WORLD, &status);
rows_available -= rows_accepted; rows_available -= rows_accepted;
phi_row += rows_accepted; phi_row += rows_accepted;
if (rows_available==0 ) { std::cout << "!!! HOST1 LOCAL MATRIX IS FILLED !!!" << std::endl;} if (rows_available==0 ) { std::cout << "!!! HOST1 LOCAL MATRIX IS FILLED !!!" << std::endl;}
...@@ -315,14 +357,19 @@ void TadahCLI::subcommand_train() { ...@@ -315,14 +357,19 @@ void TadahCLI::subcommand_train() {
MPI_Send (&b_rank, 1, MPI_INT, worker, tag, MPI_COMM_WORLD); MPI_Send (&b_rank, 1, MPI_INT, worker, tag, MPI_COMM_WORLD);
MPI_Send (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD); MPI_Send (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD);
//MPI_Recv (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status); //MPI_Recv (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status);
// this is actually necesarry for a worker to know number of rows
//MPI_Recv (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status);
// get data... // get data...
MPI_Probe(worker, tag, MPI_COMM_WORLD, &status); //MPI_Probe(worker, tag, MPI_COMM_WORLD, &status);
int arr_size = rows_accepted*phi_cols; //int arr_size = rows_accepted*phi_cols;
int arr_size2; //int arr_size2;
MPI_Get_count(&status, MPI_DOUBLE, &arr_size2); //MPI_Get_count(&status, MPI_DOUBLE, &arr_size2);
if (arr_size2!=arr_size) { throw std::runtime_error("arr_size != arr_size2\n");} //if (arr_size2!=arr_size) { throw std::runtime_error("arr_size != arr_size2\n");}
MPI_Recv (&dm.Phi.data()[phi_row], arr_size, MPI_DOUBLE, worker, tag, MPI_COMM_WORLD, &status); MPI_Recv (&dm.Phi.data()[phi_row], rows_accepted, rowvecs, worker, tag, MPI_COMM_WORLD, &status);
//for (int i=0; i<rows_accepted;++i)
// MPI_Recv (&dm.Phi.data()[phi_row+i], 1, rowvec, worker, tag, MPI_COMM_WORLD, &status);
//MPI_Recv (&dm.Phi.data()[phi_row], arr_size, MPI_DOUBLE, worker, tag, MPI_COMM_WORLD, &status);
rows_available -= rows_accepted; rows_available -= rows_accepted;
phi_row += rows_accepted; phi_row += rows_accepted;
...@@ -409,22 +456,25 @@ else { // WORKER ...@@ -409,22 +456,25 @@ else { // WORKER
} }
else if (status.MPI_TAG == DATA_TAG) { else if (status.MPI_TAG == DATA_TAG) {
// other worker is giving me some data // other worker is giving me some data
// this is actually necesarry for a worker to know number of rows
//int rows_accepted;
//MPI_Recv (&rows_accepted, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status);
int arr_size; int arr_size;
MPI_Get_count(&status, MPI_DOUBLE, &arr_size); MPI_Get_count(&status, MPI_DOUBLE, &arr_size);
//MPI_Get_count(&status, MPI_DOUBLE, &rows_accepted);
int rows_accepted = arr_size/phi_cols; int rows_accepted = arr_size/phi_cols;
if (rows_available<rows_accepted) { throw std::runtime_error("Number of rows available is smaller than number of provided rows");} if (rows_available<rows_accepted) { throw std::runtime_error("Number of rows available is smaller than number of provided rows");}
MPI_Recv (&dm.Phi.data()[phi_row], arr_size, MPI_DOUBLE, worker, tag, MPI_COMM_WORLD, &status); MPI_Recv (&dm.Phi.data()[phi_row], rows_available, rowvecs, worker, tag, MPI_COMM_WORLD, &status);
rows_available -= rows_accepted; rows_available -= rows_accepted;
phi_row += rows_accepted; phi_row += rows_accepted;
} }
else if (status.MPI_TAG == WORK_TAG) { else if (status.MPI_TAG == WORK_TAG) {
// otherwise get work package
int fn_length; // length of the filename char array int fn_length; // length of the filename char array
int first; // index of the first structure to read from the file int first; // index of the first structure to read from the file
int nstruc; // number of structures to be processed int nstruc; // number of structures to be processed
MPI_Recv (&fn_length, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status); MPI_Recv (&fn_length, 1, MPI_INT, worker, tag, MPI_COMM_WORLD, &status);
//std::cout << "WORKER: " << b_rank << " WORK TAG" << std::endl;
// otherwise get work package
char *fn = (char *) malloc(fn_length+1); char *fn = (char *) malloc(fn_length+1);
MPI_Recv (fn, fn_length, MPI_CHAR, 0, WORK_TAG, MPI_COMM_WORLD, &status); MPI_Recv (fn, fn_length, MPI_CHAR, 0, WORK_TAG, MPI_COMM_WORLD, &status);
MPI_Recv (&first, 1, MPI_INT, 0, WORK_TAG, MPI_COMM_WORLD, &status); MPI_Recv (&first, 1, MPI_INT, 0, WORK_TAG, MPI_COMM_WORLD, &status);
...@@ -469,31 +519,31 @@ else { // WORKER ...@@ -469,31 +519,31 @@ else { // WORKER
rows_needed--; rows_needed--;
} }
} }
//std::cout << "ROWS AVAILABLE: " << rows_available << std::endl;
// there are no more available rows // there are no more available rows
//std::cout << "FULL WORKER: " << b_rank << " rows_available: " << rows_available << " rows_needed: " << rows_needed << std::endl;
// send remaining data to available processes // send remaining data to available processes
// there first rows is
while (rows_needed > 0) { while (rows_needed > 0) {
// request host // request host
//std::cout << " FULL WORKER: " << b_rank << " request rows_needed a: " << rows_needed << std::endl;
MPI_Send (&rows_needed, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD); MPI_Send (&rows_needed, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD);
//std::cout << " FULL WORKER: " << b_rank << " request rows_needed b: " << rows_needed << std::endl;
int rows_accepted; // number of accepted rows int rows_accepted; // number of accepted rows
int dest; // receiving process int dest; // receiving process
// host returns which dest can accept and how much // host returns which dest can accept and how much
MPI_Recv (&dest, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status); MPI_Recv (&dest, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status);
//std::cout << " FULL WORKER: " << b_rank << " request rows_needed c: " << rows_needed << std::endl;
MPI_Recv (&rows_accepted, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status);
//std::cout << " FULL WORKER: " << b_rank << " get rows_accepted: " << rows_accepted << std::endl;
MPI_Recv (&rows_accepted, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status);
// we send data to the host or a willing worker // we send data to the host or a willing worker
//MPI_Send (&rows_accepted, 1, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); int start=temp_dm.Phi.rows()-rows_needed;
//std::cout << " FULL WORKER: " << b_rank << " send rows_accepted: " << rows_accepted << std::endl;
//std::cout << " FULL WORKER: " << b_rank << " rows_needed: " << rows_needed << std::endl; // Define temp data type for temp Phi matrix
int start=(temp_dm.Phi.rows()-rows_needed); // Phi is stored in column major order // Phi is stored in a column-major order
int arr_size = rows_accepted*phi_cols; MPI_Datatype trowvec, trowvecs;
MPI_Send (&temp_dm.Phi.data()[start], arr_size, MPI_DOUBLE, dest, DATA_TAG, MPI_COMM_WORLD); MPI_Type_vector( temp_dm.Phi.cols(), 1, temp_dm.Phi.rows(), MPI_DOUBLE, &trowvec);
MPI_Type_commit(&trowvec);
MPI_Type_create_resized(trowvec, 0, 1*sizeof(double), &trowvecs);
MPI_Type_commit(&trowvecs);
// ready to send
MPI_Send (&temp_dm.Phi.data()[start], rows_accepted, trowvecs, dest, DATA_TAG, MPI_COMM_WORLD);
rows_needed -= rows_accepted; rows_needed -= rows_accepted;
} }
...@@ -525,9 +575,9 @@ int descPHI2[9]; ...@@ -525,9 +575,9 @@ int descPHI2[9];
int descB[9]; int descB[9];
int info; int info;
int info2; int info2;
descinit_( descPHI, &PHI_rows, &PHI_cols, &rnb, &cnb, &izero, &izero, &context, /*leading dimension*/&phi_rows, &info); descinit_( descPHI, &PHI_rows, &PHI_cols, &rnb1, &cnb1, &izero, &izero, &context1, /*leading dimension*/&phi_rows, &info);
descinit_( descPHI2, &PHI_rows, &PHI_cols, &rnb, &cnb, &izero, &izero, &context, /*leading dimension*/&phi_rows, &info); descinit_( descPHI2, &PHI_rows, &PHI_cols, &rnb2, &cnb2, &izero, &izero, &context2, /*leading dimension*/&phi_rows2, &info2);
descinit_( descB , &PHI_rows, &ione, &rnb, &cnb, &izero, &izero, &context, /*leading dimension*/&phi_rows, &info2); descinit_( descB , &PHI_rows, &ione, &rnb1, &cnb1, &izero, &izero, &context1, /*leading dimension*/&phi_rows, &info2);
if (rank==1) { if (rank==1) {
std::cout << "descPHI:"<< std::endl; std::cout << "descPHI:"<< std::endl;
...@@ -553,23 +603,25 @@ char trans='N'; ...@@ -553,23 +603,25 @@ char trans='N';
int nrhs = 1; int nrhs = 1;
double *b = new double[phi_rows]; double *b = new double[phi_rows];
for (int i=0;i<phi_rows;++i) b[i]=i; for (int i=0;i<phi_rows;++i) b[i]=i;
if (rank==1)
if (rank==0)
std::cout << dm.Phi; std::cout << dm.Phi;
int ia = 1; int ia = 1;
int ja = 1; int ja = 1;
int ib = 1; int ib = 1;
int jb = 1; int jb = 1;
//
DesignMatrix<DM_Function_Base&> dm2(*fb, config); // DesignMatrix<DM_Function_Base&> dm2(*fb, config);
dm2.Phi.resize(phi_rows,phi_cols); // dm2.Phi.resize(phi_rows2,phi_cols2);
//
pdgemr2d_(&PHI_rows, &PHI_cols, dm.Phi.ptr(), &ione, &ione, descPHI, //pdgemr2d_(&PHI_rows, &PHI_cols, dm.Phi.ptr(), &ione, &ione, descPHI,
dm2.Phi.ptr(), &ione, &ione, descPHI2, &context); // dm2.Phi.ptr(), &ione, &ione, descPHI2, &context2);
if (rank==1) { //if (rank==0) {
std::cout << "----------Making copy-----------" << std::endl; // std::cout << "----------Making copy-----------" << std::endl;
std::cout << dm2.Phi; // std::cout << dm2.Phi;
} //}
double wkopt; double wkopt;
int lwork = -1; // query -> get size of the work matrix int lwork = -1; // query -> get size of the work matrix
...@@ -580,16 +632,18 @@ if (info != 0) { ...@@ -580,16 +632,18 @@ if (info != 0) {
lwork = (int)wkopt; lwork = (int)wkopt;
std::cout << "work array size: " << lwork << std::endl; //std::cout << "work array size: " << lwork << std::endl;
double *work = new double[lwork]; double *work = new double[lwork];
pdgels_(&trans, &phi_rows, &phi_cols, &nrhs, dm.Phi.ptr(), &ia, &ja, descPHI, b, &ib, &jb, descB, work, &lwork, &info); pdgels_(&trans, &phi_rows, &phi_cols, &nrhs, dm.Phi.ptr(), &ia, &ja, descPHI, b, &ib, &jb, descB, work, &lwork, &info);
double MPIt2 = MPI_Wtime(); double MPIt2 = MPI_Wtime();
printf("[%dx%d] Done, time %e s.\n", b_row, b_col, MPIt2 - MPIt1); //printf("[%dx%d] Done, time %e s.\n", b_row1, b_col1, MPIt2 - MPIt1);
delete[] b; delete[] b;
delete[] work; delete[] work;
MPI_Type_free(&rowvec);
MPI_Type_free(&rowvecs);
#endif #endif
...@@ -628,7 +682,8 @@ if(model) ...@@ -628,7 +682,8 @@ if(model)
if(fb) if(fb)
delete fb; delete fb;
blacs_gridexit_(&context); blacs_gridexit_(&context1);
blacs_gridexit_(&context2);
//MPI_Win_free(&window); //MPI_Win_free(&window);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment