From a16612ffbee5c193563ec5f0969ec2c0f00b45d2 Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Sat, 1 Feb 2014 18:09:33 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11447 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- tools/colvars/Makefile | 24 +++ tools/colvars/abf_data.cpp | 341 +++++++++++++++++++++++++++++++ tools/colvars/abf_data.h | 94 +++++++++ tools/colvars/abf_integrate.cpp | 343 ++++++++++++++++++++++++++++++++ 4 files changed, 802 insertions(+) create mode 100644 tools/colvars/Makefile create mode 100644 tools/colvars/abf_data.cpp create mode 100644 tools/colvars/abf_data.h create mode 100644 tools/colvars/abf_integrate.cpp diff --git a/tools/colvars/Makefile b/tools/colvars/Makefile new file mode 100644 index 0000000000..a1f1b4c4b0 --- /dev/null +++ b/tools/colvars/Makefile @@ -0,0 +1,24 @@ +SHELL=/bin/sh +######################### +# adjust as needed +CXX=g++ +CXXFLAGS=-Wall -O2 +# set to .exe for windows +EXT= +######################### + +all: abf_integrate$(EXT) + +clean: + -rm *~ *.o abf_integrate$(EXT) *.exe + +abf_integrate$(EXT): abf_integrate.o abf_data.o + $(CXX) -o $@ $(CXXFLAGS) $^ + +%.o: %.cpp + $(CXX) -o $@ -c $(CXXFLAGS) $< + +# dependencies +abf_integrate.o: abf_integrate.cpp abf_data.h +abf_data.o: abf_data.cpp abf_data.h + diff --git a/tools/colvars/abf_data.cpp b/tools/colvars/abf_data.cpp new file mode 100644 index 0000000000..bb08facd1d --- /dev/null +++ b/tools/colvars/abf_data.cpp @@ -0,0 +1,341 @@ + +#include "abf_data.h" +#include <fstream> +#include <string> +#include <cstring> +#include <cstdlib> +#include <ctime> + +/// Construct gradient field object from an ABF-saved file +ABFdata::ABFdata(const char *gradFileName) +{ + + std::ifstream gradFile; + std::ifstream countFile; + int n; + char hash; + double xi; + char *countFileName; + + countFileName = new char[strlen (gradFileName) + 2]; + strcpy (countFileName, gradFileName); + countFileName[strlen (gradFileName) - 4] = '\0'; + strcat (countFileName, "count"); + + std::cout << "Opening file " << gradFileName << " for reading\n"; + gradFile.open(gradFileName); + if (!gradFile.good()) { + std::cerr << "Cannot read from file " << gradFileName << ", aborting\n"; + exit(1); + } + + gradFile >> hash; + if (hash != '#') { + std::cerr << "Missing \'#\' sign in gradient file\n"; + exit(1); + } + gradFile >> Nvars; + + std::cout << "Number of variables: " << Nvars << "\n"; + + sizes = new int[Nvars]; + blocksizes = new int[Nvars]; + PBC = new int[Nvars]; + widths = new double[Nvars]; + mins = new double[Nvars]; + + scalar_dim = 1; // total is (n1 * n2 * ... * n_Nvars ) + + for (int i = 0; i < Nvars; i++) { + gradFile >> hash; + if (hash != '#') { + std::cerr << "Missing \'#\' sign in gradient file\n"; + exit(1); + } + // format is: xiMin dxi Nbins PBCflag + gradFile >> mins[i] >> widths[i] >> sizes[i] >> PBC[i]; + std::cout << "min = " << mins[i] << " width = " << widths[i] + << " n = " << sizes[i] << " PBC: " << (PBC[i]?"yes":"no") << "\n"; + + if (sizes[i] == 0) { + std::cout << "ERROR: size should not be zero!\n"; + exit(1); + } + scalar_dim *= sizes[i]; + } + + // block sizes, smallest for the last dimension + blocksizes[Nvars - 1] = 1; + for (int i = Nvars - 2; i >= 0; i--) { + blocksizes[i] = blocksizes[i + 1] * sizes[i + 1]; + } + + vec_dim = scalar_dim * Nvars; + //std::cout << "Gradient field has length " << vec_dim << "\n"; + + gradients = new double[vec_dim]; + estimate = new double[vec_dim]; + deviation = new double[vec_dim]; + count = new unsigned int[scalar_dim]; + + int *pos = new int[Nvars]; + for (int i = 0; i < Nvars; i++) + pos[i] = 0; + + for (unsigned int i = 0; i < scalar_dim; i++) { + // Here we do the Euclidian division iteratively + for (int k = Nvars - 1; k > 0; k--) { + if (pos[k] == sizes[k]) { + pos[k] = 0; + pos[k - 1]++; + } + } + for (unsigned int j = 0; j < Nvars; j++) { + // Read values of the collective variables only to check for consistency with grid + gradFile >> xi; + + double rel_diff = (mins[j] + widths[j] * (pos[j] + 0.5) - xi) / widths[j]; + if ( rel_diff * rel_diff > 1e-12 ) { + std::cout << "\nERROR: wrong coordinates in gradient file\n"; + std::cout << "Expected " << mins[j] + widths[j] * (pos[j] + 0.5) << ", got " << xi << std::endl; + exit(1); + } + } + for (unsigned int j = 0; j < Nvars; j++) { + // Read and store gradients + if ( ! (gradFile >> gradients[i * Nvars + j]) ) { + std::cout << "\nERROR: could not read gradient data\n"; + exit(1); + } + } + pos[Nvars - 1]++; // move on to next position + } + // check for end of file + if ( gradFile >> xi ) { + std::cout << "\nERROR: extraneous data at end of gradient file\n"; + exit(1); + } + gradFile.close(); + + + std::cout << "Opening file " << countFileName << " for reading\n"; + countFile.open(countFileName); + + if (!countFile.good()) { + std::cerr << "Cannot read from file " << countFileName << ", aborting\n"; + exit(1); + } + + countFile >> hash; + if (hash != '#') { + std::cerr << "Missing \'#\' sign in count file\n"; + exit(1); + } + countFile >> Nvars; + + for (int i = 0; i < Nvars; i++) { + countFile >> hash; + if (hash != '#') { + std::cerr << "Missing \'#\' sign in gradient file\n"; + exit(1); + } + countFile >> mins[i] >> widths[i] >> sizes[i] >> PBC[i]; + } + + for (unsigned int i = 0; i < scalar_dim; i++) { + for (unsigned int j = 0; j < Nvars; j++) { + // Read and ignore values of the collective variables + countFile >> xi; + } + // Read and store counts + countFile >> count[i]; + } + // Could check for end-of-file string here + countFile.close(); + delete [] countFileName; + + // for metadynamics + bias = new double[scalar_dim]; + histogram = new unsigned int[scalar_dim]; + for (unsigned int i = 0; i < scalar_dim; i++) { + histogram[i] = 0; + bias[i] = 0.0; + } +} + +ABFdata::~ABFdata() +{ + delete[] sizes; + delete[] blocksizes; + delete[] PBC; + delete[] widths; + delete[] mins; + delete[] gradients; + delete[] estimate; + delete[] deviation; + delete[] count; + delete[] bias; + delete[] histogram; +} + +unsigned int ABFdata::offset(const int *pos) +{ + unsigned int index = 0; + + for (int i = 0; i < Nvars; i++) { + // Check for out-of bounds indices here + if (pos[i] < 0 || pos[i] >= sizes[i]) { + std::cerr << "Out-of-range index: " << pos[i] << " for rank " << i << "\n"; + exit(1); + } + index += blocksizes[i] * pos[i]; + } + // we leave the multiplication below for the caller to do + // we just give the offset for scalar fields + // index *= Nvars; // Nb of gradient vectors -> nb of array elts + return index; +} + +void ABFdata::write_histogram(const char *fileName) +{ + + std::ofstream os; + unsigned int index; + int *pos, i; + + os.open(fileName); + if (!os.good()) { + std::cerr << "Cannot write to file " << fileName << ", aborting\n"; + exit(1); + } + pos = new int[Nvars]; + for (i = 0; i < Nvars; i++) + pos[i] = 0; + + for (index = 0; index < scalar_dim; index++) { + // Here we do the Euclidian division iteratively + for (i = Nvars - 1; i > 0; i--) { + if (pos[i] == sizes[i]) { + pos[i] = 0; + pos[i - 1]++; + os << "\n"; + } + } + // Now a stupid check: + if (index != offset(pos)) { + std::cerr << "Wrong position vector at index " << index << "\n"; + exit(1); + } + + for (i = 0; i < Nvars; i++) { + os << mins[i] + widths[i] * (pos[i] + 0.5) << " "; + } + os << histogram[index] << "\n"; + pos[Nvars - 1]++; // move on to next position + } + os.close(); + delete[]pos; +} + + +void ABFdata::write_bias(const char *fileName) +{ +// write the opposite of the bias, with global minimum set to 0 + + std::ofstream os; + unsigned int index; + int *pos, i; + double minbias, maxbias; + + os.open(fileName); + if (!os.good()) { + std::cerr << "Cannot write to file " << fileName << ", aborting\n"; + exit(1); + } + pos = new int[Nvars]; + for (i = 0; i < Nvars; i++) + pos[i] = 0; + + // Set the minimum value to 0 by subtracting each value from the max + maxbias = bias[0]; + for (index = 0; index < scalar_dim; index++) { + if (bias[index] > maxbias) + maxbias = bias[index]; + } + + // Set the maximum value to that of the lowest nonzero bias + minbias = bias[0]; + for (index = 0; index < scalar_dim; index++) { + if (minbias == 0.0 || (bias[index] > 0.0 && bias[index] < minbias)) + minbias = bias[index]; + } + + for (index = 0; index < scalar_dim; index++) { + // Here we do the Euclidian division iteratively + for (i = Nvars - 1; i > 0; i--) { + if (pos[i] == sizes[i]) { + pos[i] = 0; + pos[i - 1]++; + os << "\n"; + } + } + // Now a stupid check: + if (index != offset(pos)) { + std::cerr << "Wrong position vector at index " << index << "\n"; + exit(1); + } + + for (i = 0; i < Nvars; i++) { + os << mins[i] + widths[i] * (pos[i] + 0.5) << " "; + } + os << maxbias - (bias[index] > 0.0 ? bias[index] : minbias) << "\n"; + pos[Nvars - 1]++; // move on to next position + } + os.close(); + delete[]pos; +} + + +void ABFdata::write_field(double *field, const char *fileName) +{ + std::ofstream os; + unsigned int index; + int *pos, i; + double *f; + + os.open(fileName); + if (!os.good()) { + std::cerr << "Cannot write to file " << fileName << ", aborting\n"; + exit(1); + } + pos = new int[Nvars]; + for (i = 0; i < Nvars; i++) + pos[i] = 0; + + // start at beginning of array + f = field; + + for (index = 0; index < scalar_dim; index++) { + // Here we do the Euclidian division iteratively + for (i = Nvars - 1; i > 0; i--) { + if (pos[i] == sizes[i]) { + pos[i] = 0; + pos[i - 1]++; + os << "\n"; + } + } + + for (i = 0; i < Nvars; i++) { + os << mins[i] + widths[i] * (pos[i] + 0.5) << " "; + } + for (i = 0; i < Nvars; i++) { + os << f[i] << " ";; + } + os << "\n"; + + pos[Nvars - 1]++; // move on to next position... + f += Nvars; // ...also in the array + } + os.close(); + delete[]pos; +} diff --git a/tools/colvars/abf_data.h b/tools/colvars/abf_data.h new file mode 100644 index 0000000000..97188f764f --- /dev/null +++ b/tools/colvars/abf_data.h @@ -0,0 +1,94 @@ +/// \file integrate.h General headers for ABF_integrate + +#include <iostream> +#include <vector> + +#define MIN_SAMPLES 1 + +/// Free energy gradients class +class ABFdata { + + protected: + /// Sizes of (i-1) dimension blocks + /// computed as Prod_(j<i) sizes[j] + int *blocksizes; + /// Minimum values of each variable + double *mins; + + public: + int Nvars; + /// Free energy gradients (vector field) + double *gradients; + /// Sampling from the ABF calculation + unsigned int *count; + /// Bin widths + double *widths; + + unsigned int scalar_dim; + unsigned int vec_dim; + unsigned int *histogram; + + /// History-dependent bias + double *bias; + + /// Estimate of the FE gradient computed + /// from MtD bias or histogram in standard MC + double *estimate; + + /// Deviation between starting free energy gradient and + /// estimated one + double *deviation; + + void write_histogram(const char *fileName); + void write_bias(const char *fileName); + void write_field(double *field, const char *fileName); + + /// Grid sizes + int *sizes; + + /// Flag stating if each variable is periodic + int *PBC; + + /// Constructor: reads from a file + ABFdata(const char *gradFileName); + ~ABFdata(); + + /// \brief Returns an offset for scalar fields based on a n-index. + /// multiply by Nvars to get an offset in a Nvars-vector field + unsigned int offset(const int *); + + inline bool wrap(int &pos, int i); + + /// Decides if an offset is outside the allowed region based on the ABF sampling + inline bool allowed(unsigned int offset); +}; + + +inline bool ABFdata::wrap(int &pos, int i) +{ + if (PBC[i]) { + if (pos == -1) { + pos = sizes[i] - 1; + return true; + } + if (pos == sizes[i]) { + pos = 0; + return true; + } + } else { + // No PBC + if (pos == -1) { + pos = 0; + return false; + } + if (pos == sizes[i]) { + pos = sizes[i] - 1; + return false; + } + } + return true; +} + +inline bool ABFdata::allowed(unsigned int offset) { + return count[offset] > MIN_SAMPLES; +} diff --git a/tools/colvars/abf_integrate.cpp b/tools/colvars/abf_integrate.cpp new file mode 100644 index 0000000000..69ebb22802 --- /dev/null +++ b/tools/colvars/abf_integrate.cpp @@ -0,0 +1,343 @@ +/**************************************************************** + * abf_integrate * + * Integrate n-dimensional PMF from discrete gradient grid * + * Jerome Henin <jerome.henin@ibpc.fr> * + ****************************************************************/ + +#include "abf_data.h" +#include <fstream> +#include <string> +#include <cstring> +#include <cstdlib> +#include <ctime> +#include <cmath> + +char *parse_cl(int argc, char *argv[], unsigned int *nsteps, double *temp, + bool * meta, double *hill, double *hill_fact); +double compute_deviation(ABFdata * data, bool meta, double kT); + +int main(int argc, char *argv[]) +{ + char *data_file; + char *out_file; + unsigned int step, nsteps, total, out_freq; + int *pos, *dpos, *newpos; + unsigned int *histogram; + const double *grad, *newgrad; + unsigned int offset, newoffset; + int not_accepted; + double dA; + double temp; + double mbeta; + bool meta; + double hill, hill_fact, hill_min; + double rmsd, rmsd_old, rmsd_rel_change, convergence_limit; + bool converged; + unsigned int scale_hill_step; + + // Setting default values + nsteps = 0; + temp = 500; + meta = true; + hill = 0.01; + hill_fact = 0.5; + hill_min = 0.0005; + + convergence_limit = -0.001; + + if (!(data_file = parse_cl(argc, argv, &nsteps, &temp, &meta, &hill, &hill_fact))) { + std::cerr << "\nabf_integrate: MC-based integration of multidimensional free energy gradient\n"; + std::cerr << "Version 20110511\n\n"; + std::cerr << "Syntax: " << argv[0] << + " <filename> [-n <nsteps>] [-t <temp>] [-m [0|1] (metadynamics)]" + " [-h <hill_height>] [-f <variable_hill_factor>]\n\n"; + exit(1); + } + + if (meta) { + std::cout << "\nUsing metadynamics-style sampling with hill height: " << hill << "\n"; + if (hill_fact) { + std::cout << "Varying hill height by factor " << hill_fact << "\n"; + } + } else { + std::cout << "\nUsing unbiased MC sampling\n"; + } + + if (nsteps) { + std::cout << "Sampling " << nsteps << " steps at temperature " << temp << "\n\n"; + out_freq = nsteps / 10; + scale_hill_step = nsteps / 2; + converged = true; + } else { + std::cout << "Sampling until convergence at temperature " << temp << "\n\n"; + out_freq = 1000000; + converged = false; + } + + // Inverse temperature in (kcal/mol)-1 + mbeta = -1 / (0.001987 * temp); + + ABFdata data(data_file); + + if (!nsteps) { + scale_hill_step = 2000 * data.scalar_dim; + nsteps = 2 * scale_hill_step; + std::cout << "Setting minimum number of steps to " << nsteps << "\n"; + } + + srand(time(NULL)); + + pos = new int[data.Nvars]; + dpos = new int[data.Nvars]; + newpos = new int[data.Nvars]; + + do { + for (int i = 0; i < data.Nvars; i++) { + pos[i] = rand() % data.sizes[i]; + } + offset = data.offset(pos); + } while ( !data.allowed (offset) ); + + rmsd = compute_deviation(&data, meta, 0.001987 * temp); + std::cout << "\nInitial gradient RMS is " << rmsd << "\n"; + + total = 0; + for (step = 1; (step <= nsteps || !converged); step++) { + + if ( step % out_freq == 0) { + rmsd_old = rmsd; + rmsd = compute_deviation(&data, meta, 0.001987 * temp); + rmsd_rel_change = (rmsd - rmsd_old) / (rmsd_old * double (out_freq)) * 1000000.0; + std::cout << "Step " << step << " ; gradient RMSD is " << rmsd + << " ; relative change per 1M steps " << rmsd_rel_change; + if ( rmsd_rel_change > convergence_limit && step >= nsteps ) { + converged = true; + } + + if (meta && hill_fact && step > scale_hill_step && hill > hill_min ) { + hill *= hill_fact; + std::cout << " - changing hill height to " << hill << "\n"; + } else { + std::cout << "\n"; + } + } + + offset = data.offset(pos); + data.histogram[offset]++; + if (meta) { + data.bias[offset] += hill; + } + + grad = data.gradients + offset * data.Nvars; + + not_accepted = 1; + while (not_accepted) { + dA = 0.0; + total++; + for (int i = 0; i < data.Nvars; i++) { + dpos[i] = rand() % 3 - 1; + newpos[i] = pos[i] + dpos[i]; + data.wrap(newpos[i], i); + if (newpos[i] == pos[i]) + dpos[i] = 0; + + if (dpos[i]) { + dA += grad[i] * dpos[i] * data.widths[i]; + // usefulness of the interpolation below depends on + // where the grid points are for the histogram wrt to the gradients + // If done, it has to be done in all directions + // the line below is useless + //dA += 0.5 * (newgrad[i] + grad[i]) * dpos[i] * data.widths[i]; + } + } + + newoffset = data.offset(newpos); + if (meta) { + dA += data.bias[newoffset] - data.bias[offset]; + } + + if ( data.allowed (newoffset) && (((float) rand()) / RAND_MAX < exp(mbeta * dA)) ) { + // Accept move + for (int i = 0; i < data.Nvars; i++) { + pos[i] = newpos[i]; + not_accepted = 0; + } + } + } + } + std::cout << "Run " << total << " total iterations; acceptance ratio is " + << double (step) / double (total) + << " ; final gradient RMSD is " << compute_deviation(&data, meta, 0.001987 * temp) << "\n"; + + out_file = new char[strlen(data_file) + 8]; + + if (meta) { + sprintf(out_file, "%s.pmf", data_file); + std::cout << "Writing PMF to file " << out_file << "\n"; + data.write_bias(out_file); + } + + // TODO write a PMF for unbiased MC, too... + sprintf(out_file, "%s.histo", data_file); + std::cout << "Writing sampling histogram to file " << out_file << "\n"; + data.write_histogram(out_file); + + sprintf(out_file, "%s.est", data_file); + std::cout << "Writing estimated FE gradient to file " << out_file << "\n"; + data.write_field(data.estimate, out_file); + + sprintf(out_file, "%s.dev", data_file); + std::cout << "Writing FE gradient deviation to file " << out_file << "\n\n"; + data.write_field(data.deviation, out_file); + + delete [] pos; + delete [] dpos; + delete [] newpos; + delete [] out_file; + exit(0); +} + + +double compute_deviation(ABFdata * data, bool meta, double kT) +{ + // Computing deviation between gradients differentiated from pmf + // and input data + // NOTE: this is mostly for output, hence NOT performance-critical + double *dev = data->deviation; + double *est = data->estimate; + const double *grad = data->gradients; + int *pos, *dpos, *newpos; + double rmsd = 0.0; + unsigned int offset, newoffset; + double sum; + int c; + bool moved; + unsigned int norm = 0; // number of data points summmed + + pos = new int[data->Nvars]; + dpos = new int[data->Nvars]; + newpos = new int[data->Nvars]; + + for (int i = 0; i < data->Nvars; i++) + pos[i] = 0; + + for (offset = 0; offset < data->scalar_dim; offset++) { + for (int i = data->Nvars - 1; i > 0; i--) { + if (pos[i] == data->sizes[i]) { + pos[i] = 0; + pos[i - 1]++; + } + } + + if (data->allowed (offset)) { + for (int i = 0; i < data->Nvars; i++) + newpos[i] = pos[i]; + + for (int i = 0; i < data->Nvars; i++) { + est[i] = 0.0; + sum = 0.0; // sum of finite differences on two sides (if not on edge of the grid) + c = 0; // count of summed values + + newpos[i] = pos[i] - 1; + moved = data->wrap(newpos[i], i); + newoffset = data->offset(newpos); + if ( moved && data->allowed (newoffset) ) { + if (meta) { + sum = (data->bias[newoffset] - data->bias[offset]) / data->widths[i]; + c++; + } else { + if (data->histogram[offset] && data->histogram[newoffset]) { + sum = kT * log(double (data->histogram[newoffset]) / + double (data->histogram[offset])) / data->widths[i]; + c++; + } + } + } + + newpos[i] = pos[i] + 1; + moved = data->wrap(newpos[i], i); + newoffset = data->offset(newpos); + if ( moved && data->allowed (newoffset) ) { + if (meta) { + sum += (data->bias[offset] - data->bias[newoffset]) / data->widths[i]; + c++; + } else { + if (data->histogram[offset] && data->histogram[newoffset]) { + sum += kT * log(double (data->histogram[offset]) / + double (data->histogram[newoffset])) / data->widths[i]; + c++; + } + } + } + + newpos[i] = pos[i]; // Go back to initial position for next dimension + + est[i] = (c ? sum/double(c) : 0.0); + dev[i] = grad[i] - est[i]; + rmsd += dev[i] * dev[i]; + norm++; + } + } + + pos[data->Nvars - 1]++; // move on to next point + est += data->Nvars; + dev += data->Nvars; + grad += data->Nvars; + } + + delete [] pos; + delete [] newpos; + delete [] dpos; + + return sqrt(rmsd / norm); +} + + +char *parse_cl(int argc, char *argv[], unsigned int *nsteps, double *temp, + bool * meta, double *hill, double *hill_fact) +{ + char *filename = NULL; + float f_temp, f_hill; + int meta_int; + + // getting default value for the integer + meta_int = (*meta ? 1 : 0); + + // "Syntax: " << argv[0] << " <filename> [-n <nsteps>] [-t <temp>] [-m [0|1] (metadynamics)] [-h <hill_height>]\n"; + if (argc < 2) { + return NULL; + } + + for (int i = 2; i + 1 < argc; i += 2) { + if (argv[i][0] != '-') { + return NULL; + } + switch (argv[i][1]) { + case 'n': + if (sscanf(argv[i + 1], "%u", nsteps) != 1) + return NULL; + break; + case 't': + if (sscanf(argv[i + 1], "%lf", temp) != 1) + return NULL; + break; + case 'm': + if (sscanf(argv[i + 1], "%u", &meta_int) != 1) + return NULL; + break; + case 'h': + if (sscanf(argv[i + 1], "%lf", hill) != 1) + return NULL; + break; + case 'f': + if (sscanf(argv[i + 1], "%lf", hill_fact) != 1) + return NULL; + break; + default: + return NULL; + } + } + + *meta = (meta_int != 0); + return argv[1]; +} -- GitLab