From 24314b530f55f65845f23d2ee4844d10d652078e Mon Sep 17 00:00:00 2001 From: mkirsz <s1351949@sms.ed.ac.uk> Date: Sun, 22 Dec 2024 17:03:14 +0000 Subject: [PATCH] adjusted examples to reflect recent changes --- examples/API/ex_1/config | 3 +- examples/API/ex_1/ex1.cpp | 264 ++++++++++++++--------------- examples/API/ex_2/config_train | 3 +- examples/API/ex_2/pot.tadah | 2 +- examples/API/ex_hpo_1/config.train | 2 +- examples/API/ex_hpo_1/targets | 8 +- examples/CLI/ex_1/config.train | 2 +- 7 files changed, 141 insertions(+), 143 deletions(-) diff --git a/examples/API/ex_1/config b/examples/API/ex_1/config index 3de1553..2ffa081 100644 --- a/examples/API/ex_1/config +++ b/examples/API/ex_1/config @@ -7,8 +7,7 @@ ATOMS Ta WATOMS 73 INIT2B true -INIT3B false -INITMB false +TYPE2B D2_Blip 4 4 Ta Ta RCUT2B 5.3 diff --git a/examples/API/ex_1/ex1.cpp b/examples/API/ex_1/ex1.cpp index f6cd54c..bd890af 100644 --- a/examples/API/ex_1/ex1.cpp +++ b/examples/API/ex_1/ex1.cpp @@ -43,136 +43,136 @@ * https://journals.aps.org/prb/abstract/10.1103/PhysRevB.88.134101 */ int main() { - - std::cout << "TRAINING STAGE" << std::endl; - // Config file configures almost all model parameters. - // See below for a more detailed explanation of used key-value(s) pairs. - Config config("config"); - - // First we load all training data from a list - // of training datasets into StrutureDB object. - // Paths to datasets are specified with a key DBFILE in a config file. - std::cout << "StructureDB loading data..." << std::flush; - StructureDB stdb(config); - std::cout << "Done!" << std::endl; - - // Next we pass StructureDB object to the nearest neighbour calculator. - // NNFinder will create full nearest neighbours lists for every atom - // in every structure. These lists will be stored by individual Structures - // in a StructureDB object. - // The lists are calculated up to the max cutoff from the config file: - // cutoff_max = max(RCUT2B, RCUT3B, RCUTMB). - std::cout << "Calculating nearest neighbours..." << std::flush; - NNFinder nnf(config); - nnf.calc(stdb); - std::cout << "Done!" << std::endl; - - // STEP 1a: Select descriptors. - // All three types must be specified. - // Use Dummy if given type is not required. - - // D2 - TWO-BODY - //using D2=D2_LJ; - //using D2=D2_BP; - using D2=D2_Blip; - //using D2=D2_Dummy; - //using D2=D2_EAM; - - // D3 - THREE-BODY - using D3=D3_Dummy; - - // DM - MANY-BODY - //using DM=DM_EAM; - //using DM=DM_EAD; - using DM=DM_Dummy; - - // STEP 1b: Select cutoffs, C2 for D2, etc - using C2=Cut_Cos; - using C3=Cut_Dummy; - using CM=Cut_Dummy; - - // STEP 1c: Prepare descriptor calculator - DescriptorsCalc<D2,D3,DM,C2,C3,CM> dc(config); - - // STEP 2a: Select Basis Function (BF) or Kernels (K). - // BF is used for M_BLR - Bayesian Linear Regression - // K is used with M_KRR - Kernel Ridge Regression - // See documentation for more BF and K - using BF=DM_BF_Linear; - //using BF=BF_Polynomial2; - //using K=Kern_Linear; - //using K=Kern_Quadratic; - - // STEP 2b: Select Model - using M=M_BLR<BF>; - //using M=M_KRR<K>; - - //// STEP 2c: Instantiate a model - M model(config); - - //std::cout << "TRAINING STAGE..." << std::flush; - - // STEP 3: Training - Option 1. - // Train with StructureDB only. We have to provide calculators here. - // Descriptors are calculated in batches to construct a design matrix - // and then are discarded. - // This is usually the best choice unless you need descriptors for something else - // after the training is done. - model.train(stdb,dc); - - // STEP 3: Training - Option 2. - // Train with StructureDB and precalcualted StDescriptorsDB. - //StDescriptorsDB st_desc_db = dc.calc(stdb); - //model.train(st_desc_db,stdb); - std::cout << "Done!" << std::endl; - - // STEP 4: Save model to a text file. - // Once model is trained we can dump it to a file. - // Saved models can be used with LAMMPS or can be reloaded - // to make predictions. - std::cout << "Saving LAMMPS pot.tadah file..." << std::flush; - Config param_file = model.get_param_file(); - std::ofstream outfile("pot.tadah"); - outfile << param_file << std::endl; - outfile.close(); - std::cout << "Done!" << std::endl; - - std::cout << "PREDICTION STAGE..." << std::endl; - // STEP 1: We will reuse LAMMPS param file and add to it - // DBFILE(s) from config_pred file. - // In other words training datasets go to the config file - // and validation datasets are in the config_pred - param_file.add("config_pred"); - - // STEP 2: Load DBFILE from config_pred - std::cout << "StructureDB loading data..." << std::flush; - StructureDB stdb2(param_file); - std::cout << "Done!" << std::endl; - - // STEP 3: Calculate nearest neighbours - std::cout << "Calculating nearest neighbours..." << std::flush; - NNFinder nnf2(param_file); - nnf2.calc(stdb2); - std::cout << "Done!" << std::endl; - - // STEP 4: Prepare DescriptorCalc - DescriptorsCalc<D2,D3,DM,C2,C3,CM> dc2(param_file); - - // STEP 5: Results are saved to new StructureDB object - // - it will only contain predicted values - // so there are no atom positions, etc... - - bool err_bool=false; // predict error, requires LAMBDA -1 - t_type predicted_error; // container for prediction error - std::cout << "Predicting..." << std::flush; - StructureDB stpred = model.predict(param_file,stdb2,dc2); - //StructureDB stpred = model.predict(param_file,stdb2,dc2,predicted_error); - std::cout << "Done!" << std::endl; - - std::cout << "Dumping results to disk..." << std::flush; - Output output(param_file,err_bool); - output.print_predict_all(stdb,stpred,predicted_error); - std::cout << "Done!" << std::endl; - - return 0; + + std::cout << "TRAINING STAGE" << std::endl; + // Config file configures almost all model parameters. + // See below for a more detailed explanation of used key-value(s) pairs. + Config config("config"); + + // First we load all training data from a list + // of training datasets into StrutureDB object. + // Paths to datasets are specified with a key DBFILE in a config file. + std::cout << "StructureDB loading data..." << std::flush; + StructureDB stdb(config); + std::cout << "Done!" << std::endl; + + // Next we pass StructureDB object to the nearest neighbour calculator. + // NNFinder will create full nearest neighbours lists for every atom + // in every structure. These lists will be stored by individual Structures + // in a StructureDB object. + // The lists are calculated up to the max cutoff from the config file: + // cutoff_max = max(RCUT2B, RCUT3B, RCUTMB). + std::cout << "Calculating nearest neighbours..." << std::flush; + NNFinder nnf(config); + nnf.calc(stdb); + std::cout << "Done!" << std::endl; + + // STEP 1a: Select descriptors. + // All three types must be specified. + // Use Dummy if given type is not required. + + // D2 - TWO-BODY + //using D2=D2_LJ; + //using D2=D2_BP; + using D2=D2_Blip; + //using D2=D2_Dummy; + //using D2=D2_EAM; + + // D3 - THREE-BODY + using D3=D3_Dummy; + + // DM - MANY-BODY + //using DM=DM_EAM; + //using DM=DM_EAD; + using DM=DM_Dummy; + + // STEP 1b: Select cutoffs, C2 for D2, etc + using C2=Cut_Cos; + using C3=Cut_Dummy; + using CM=Cut_Dummy; + + // STEP 1c: Prepare descriptor calculator + DescriptorsCalc<D2,D3,DM,C2,C3,CM> dc(config); + + // STEP 2a: Select Basis Function (BF) or Kernels (K). + // BF is used for M_BLR - Bayesian Linear Regression + // K is used with M_KRR - Kernel Ridge Regression + // See documentation for more BF and K + using BF=DM_BF_Linear; + //using BF=BF_Polynomial2; + //using K=Kern_Linear; + //using K=Kern_Quadratic; + + // STEP 2b: Select Model + using M=M_BLR<BF>; + //using M=M_KRR<K>; + + //// STEP 2c: Instantiate a model + M model(config); + + //std::cout << "TRAINING STAGE..." << std::flush; + + // STEP 3: Training - Option 1. + // Train with StructureDB only. We have to provide calculators here. + // Descriptors are calculated in batches to construct a design matrix + // and then are discarded. + // This is usually the best choice unless you need descriptors for something else + // after the training is done. + model.train(stdb,dc); + + // STEP 3: Training - Option 2. + // Train with StructureDB and precalcualted StDescriptorsDB. + //StDescriptorsDB st_desc_db = dc.calc(stdb); + //model.train(st_desc_db,stdb); + std::cout << "Done!" << std::endl; + + // STEP 4: Save model to a text file. + // Once model is trained we can dump it to a file. + // Saved models can be used with LAMMPS or can be reloaded + // to make predictions. + std::cout << "Saving LAMMPS pot.tadah file..." << std::flush; + Config param_file = model.get_param_file(); + std::ofstream outfile("pot.tadah"); + outfile << param_file << std::endl; + outfile.close(); + std::cout << "Done!" << std::endl; + + std::cout << "PREDICTION STAGE..." << std::endl; + // STEP 1: We will reuse LAMMPS param file and add to it + // DBFILE(s) from config_pred file. + // In other words training datasets go to the config file + // and validation datasets are in the config_pred + param_file.add("config_pred"); + + // STEP 2: Load DBFILE from config_pred + std::cout << "StructureDB loading data..." << std::flush; + StructureDB stdb2(param_file); + std::cout << "Done!" << std::endl; + + // STEP 3: Calculate nearest neighbours + std::cout << "Calculating nearest neighbours..." << std::flush; + NNFinder nnf2(param_file); + nnf2.calc(stdb2); + std::cout << "Done!" << std::endl; + + // STEP 4: Prepare DescriptorCalc + DescriptorsCalc<D2,D3,DM,C2,C3,CM> dc2(param_file); + + // STEP 5: Results are saved to new StructureDB object + // - it will only contain predicted values + // so there are no atom positions, etc... + + bool err_bool=false; // predict error, requires LAMBDA -1 + t_type predicted_error; // container for prediction error + std::cout << "Predicting..." << std::flush; + StructureDB stpred = model.predict(param_file,stdb2,dc2); + //StructureDB stpred = model.predict(param_file,stdb2,dc2,predicted_error); + std::cout << "Done!" << std::endl; + + std::cout << "Dumping results to disk..." << std::flush; + Output output(param_file,err_bool); + output.print_predict_all(stdb,stpred,predicted_error); + std::cout << "Done!" << std::endl; + + return 0; } diff --git a/examples/API/ex_2/config_train b/examples/API/ex_2/config_train index 3de1553..2ffa081 100644 --- a/examples/API/ex_2/config_train +++ b/examples/API/ex_2/config_train @@ -7,8 +7,7 @@ ATOMS Ta WATOMS 73 INIT2B true -INIT3B false -INITMB false +TYPE2B D2_Blip 4 4 Ta Ta RCUT2B 5.3 diff --git a/examples/API/ex_2/pot.tadah b/examples/API/ex_2/pot.tadah index 7a1ac42..8e14f92 100644 --- a/examples/API/ex_2/pot.tadah +++ b/examples/API/ex_2/pot.tadah @@ -20,7 +20,7 @@ RCTYPE2B Cut_Cos RCUT2B 5.3 SGRID2B -2 4 0.1 1.0 SWEIGHT 1 -TYPE2B D2_Blip +TYPE2B D2_Blip 4 4 Ta Ta VERBOSE 0 WATOMS 73 WEIGHTS -4.4157415513006 0.42189535112317 -0.37507712884283 -0.063288121709893 -0.052300112640426 diff --git a/examples/API/ex_hpo_1/config.train b/examples/API/ex_hpo_1/config.train index 440e8e0..5166847 100644 --- a/examples/API/ex_hpo_1/config.train +++ b/examples/API/ex_hpo_1/config.train @@ -3,7 +3,7 @@ WATOMS 22 DBFILE tdata.db INIT2B true RCUT2B 5.3 -TYPE2B D2_Blip +TYPE2B D2_Blip 5 5 Ti Ti CGRID2B -1 5 1.0 5.0 SGRID2B -2 5 0.02 1.0 RCTYPE2B Cut_Cos diff --git a/examples/API/ex_hpo_1/targets b/examples/API/ex_hpo_1/targets index 4378dfc..a19cc92 100644 --- a/examples/API/ex_hpo_1/targets +++ b/examples/API/ex_hpo_1/targets @@ -7,10 +7,10 @@ POTEVERY 100 potdir # Controls how often the potential file is written # Model Parameters to be Optimized # The initial model is specified in a separate config file. -OPTIM RCUT2B 5.0 6.0 # Optimize two-body cutoff within these bounds. -OPTIM CGRID2B 1.0 6.0 # Optimize CGRID2B within these bounds. -OPTIM SGRID2B 0.01 0.1 # Optimize SGRID2B within these bounds. -OPTIM WATOMS 0.1 20 # Optimize atomic weights within these bounds. +OPTIM RCUT2B (1) 5.0 6.0 # Optimize two-body cutoff within these bounds. +OPTIM CGRID2B (1-5) 1.0 6.0 # Optimize CGRID2B within these bounds. +OPTIM SGRID2B (1,2,3,4,5) 0.01 0.1 # Optimize SGRID2B within these bounds. +OPTIM WATOMS (1) 0.1 20 # Optimize atomic weights within these bounds. # Basic Optimization Targets with Weighted Inclusion in Global Loss ERMSE 0 100 # Energy RMSE with weight 100, printed to properties.dat. diff --git a/examples/CLI/ex_1/config.train b/examples/CLI/ex_1/config.train index 2ca1796..87603a6 100644 --- a/examples/CLI/ex_1/config.train +++ b/examples/CLI/ex_1/config.train @@ -6,7 +6,7 @@ DBFILE tdata.db # Training dataset INIT2B true # Use two-body descriptor MODEL M_BLR BF_Linear # Use linear model -TYPE2B D2_Blip # Use D2_Blip descriptor +TYPE2B D2_Blip 4 4 Ta Ta # Use D2_Blip descriptor RCTYPE2B Cut_Cos # Cutoff function for two-body descriptor RCUT2B 5.3 # Cutoff distance -- GitLab