diff --git a/include/tadah/mlip/analytics/analytics.h b/include/tadah/mlip/analytics/analytics.h index 7dd101694959a1e0c4036d525bf8c8820dfc4ef9..71cda1da7ecc506fa2e8869e8cfe41ce2c34a1ff 100644 --- a/include/tadah/mlip/analytics/analytics.h +++ b/include/tadah/mlip/analytics/analytics.h @@ -4,6 +4,8 @@ #include <tadah/mlip/structure_db.h> #include <tadah/core/core_types.h> +namespace tadah { +namespace mlip { /** Class for analysing and comparing datasets * @@ -53,4 +55,6 @@ class Analytics { t_type calc_s_r_sq() const; }; +} +} #endif diff --git a/include/tadah/mlip/atom.h b/include/tadah/mlip/atom.h index 04d82a4543da3ebb68f33dd3373f279abed62d7c..43796cf07db76a190316f07f8beffd1436756089 100644 --- a/include/tadah/mlip/atom.h +++ b/include/tadah/mlip/atom.h @@ -1,89 +1,97 @@ -#ifndef ATOM_H -#define ATOM_H +#pragma once +#ifndef TADAH_MLIP_ATOM_H +#define TADAH_MLIP_ATOM_H -#include <tadah/core/element.h> -#include <tadah/core/core_types.h> - -#include <string> +#include <cstddef> #include <iostream> +// Forward-declare +namespace tadah { +namespace mlip { +class Structure; // Defined in "structure.h" +} // namespace mlip +} // namespace tadah + +namespace tadah { +namespace mlip { + /** - * Container to represent atom properties - * - * Usage example: - * - * # create an empty atom object - all attributes are left uninitialised - * Atom atom; - * - * # set atom symbol - * atom.symbol="Ti" - * - * # set atom name, atomic number... see Element class for attributes - * - * # set atom position to (1.1, 2.2, 3.3) - * atom.position(0) = 1.1; - * atom.position(1) = 2.2; - * atom.position(2) = 3.3; + * @brief Acts as a handle referencing subrange data in the parent Structure's SoA. * - * # set force - * atom.force(0)= 0.1; - * atom.force(1)= -0.2; - * atom.force(2)= 0.3; - * - * Usage example: - * - * # Use constructor to fully initialise this object - * # with the position and force as in the example above - * Element element = PeriodicTable().find_by_symbol("Ti"); - * Atom atom(element, 1.1, 2.2, 3.3, 0.1, -0.2, 0.3); - * - * Usage example: - * - * # Print atom object using streams: - * std::cout << atom; - * - * # Print position only - * std::cout << atom.position - * - * @see Structure + * Inline bodies for each method appear after including "structure_inl.h", providing + * the compiler with the full definition of Structure. */ -struct Atom: public Element { - /** - * Create an empty atom object. All class attributes are left uninitialised. - */ - Atom(); - - /** This constructor fully initialise this object - * - * @param[in] element Chemical Element - * @param[in] px,py,pz Atomic coordinates - * @param[in] fx,fy,fz Force acting on the atom - * - */ - Atom(const Element &element, - const double px, const double py, const double pz, - const double fx, const double fy, const double fz); - - /** Hold position of the atom. */ - Vec3d position; - - /** Hold force on the atom. */ - Vec3d force; - - /** Print object summary to the stream */ - friend std::ostream& operator<<(std::ostream& os, const Atom& atom); - - /** Return true if both atoms are the same. - * - * This operator compares chemical type, position and force. - */ - bool operator==(const Atom &) const; - - /** Return true if both atoms have the same position and type. - * - * This method compares chemical type and position only - */ - bool is_the_same(const Atom &, double thr=1e-6) const; +class Atom { +public: + inline Atom(Structure* parent, std::size_t i) + : parent_(parent), index_(i) + {} + + // x(), y(), z(), fx(), fy(), fz(), atomicNumber() + // Definitions appear after including "structure_inl.h" + inline double& x(); + inline double& y(); + inline double& z(); + inline const double& x() const; + inline const double& y() const; + inline const double& z() const; + + inline double& fx(); + inline double& fy(); + inline double& fz(); + inline const double& fx() const; + inline const double& fy() const; + inline const double& fz() const; + + inline int& atomicNumber(); + inline const int& atomicNumber() const; + + inline friend std::ostream& operator<<(std::ostream& os, const Atom& a); + +private: + Structure* parent_ = nullptr; + std::size_t index_ = 0; }; -#endif + +} // namespace mlip +} // namespace tadah + +// At the bottom, include structure_inl.h to define Atom's methods inline. +#include "structure_inl.h" + +namespace tadah { +namespace mlip { + +// Below are the inline definitions of each Atom method, referencing SoA data in the parent Structure. + +inline double& Atom::x() { return parent_->x(index_); } +inline double& Atom::y() { return parent_->y(index_); } +inline double& Atom::z() { return parent_->z(index_); } +inline const double& Atom::x() const { return parent_->x(index_); } +inline const double& Atom::y() const { return parent_->y(index_); } +inline const double& Atom::z() const { return parent_->z(index_); } + +inline double& Atom::fx() { return parent_->fx(index_); } +inline double& Atom::fy() { return parent_->fy(index_); } +inline double& Atom::fz() { return parent_->fz(index_); } +inline const double& Atom::fx() const { return parent_->fx(index_); } +inline const double& Atom::fy() const { return parent_->fy(index_); } +inline const double& Atom::fz() const { return parent_->fz(index_); } + +inline int& Atom::atomicNumber() { return parent_->atomicNumber(index_); } +inline const int& Atom::atomicNumber() const { return parent_->atomicNumber(index_); } + +inline std::ostream& operator<<(std::ostream& os, const Atom& a) { + os << "[Atom idx=" << a.index_ + << " Z=" << a.atomicNumber() + << " pos=(" << a.x() << "," << a.y() << "," << a.z() << ") " + << "force=(" << a.fx() << "," << a.fy() << "," << a.fz() << ")]"; + return os; +} + +} // namespace mlip +} // namespace tadah + +#endif // TADAH_MLIP_ATOM_H + diff --git a/include/tadah/mlip/dataset_readers/castep_castep_reader.h b/include/tadah/mlip/dataset_readers/castep_castep_reader.h index 794a1144c29a4ed44c7c5abf65a88e26dc90f68c..86c2691d2ec7ba1a24f3f6c37dc8b56763a578fd 100644 --- a/include/tadah/mlip/dataset_readers/castep_castep_reader.h +++ b/include/tadah/mlip/dataset_readers/castep_castep_reader.h @@ -10,6 +10,9 @@ #include <sstream> #include <vector> #include <stdexcept> +namespace tadah { +namespace mlip { + /** * @class CastepCastepReader * @brief A class for reading and parsing CASTEP .castep files. @@ -48,16 +51,20 @@ class CastepCastepReader : public DatasetReader { public: /** * @brief Constructs a CastepCastepReader with a StructureDB reference. - * @param db Reference to a StructureDB object for storing parsed data. */ - CastepCastepReader(StructureDB& db); + CastepCastepReader(); /** - * @brief Constructs a CastepCastepReader and reads the specified file. - * @param db Reference to a StructureDB object for storing parsed data. - * @param filename Name of the .castep file to read. + * @brief A simple constructor referencing a DB, plus file index, plus # of structures. + * @param db Reference to HPC DB (already sized for this file). + * @param filename Name of the .castep file. + * @param fileIndex Which file index in the DB to fill. + * @param nStructsInFile How many structures belong to this file in the DB. */ - CastepCastepReader(StructureDB& db, const std::string& filename); + CastepCastepReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile); /** * @brief Reads data from a specified .castep file. @@ -76,6 +83,17 @@ public: virtual void print_summary() const override; virtual std::string get_summary() const override; + /** + * @brief Pass-1 scan of a .castep file to count how many structures + * appear, and how many atoms each structure has. + * + * @param filename The .castep file to scan. + * @param nStructures [out] Will be set to the number of structures found. + * @param atomsPerStructure [out] Will be filled with the number of atoms in each structure. + */ + void pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) override; protected: std::string raw_data_; /**< Stores raw file data */ std::string filename_; /**< Filename of the .castep file */ @@ -84,6 +102,8 @@ protected: double p_conv = 0.00624150913; /**< Conversion factor from GPa to eV/A^3 */ }; +} +} #endif // CASTEP_CASTEP_READER_H diff --git a/include/tadah/mlip/dataset_readers/castep_geom_reader.h b/include/tadah/mlip/dataset_readers/castep_geom_reader.h index f44dc7231d6e6abad416b5f22ea01ab3893d94c4..7cfac1779bb06475e552ee71a817c7967436d167 100644 --- a/include/tadah/mlip/dataset_readers/castep_geom_reader.h +++ b/include/tadah/mlip/dataset_readers/castep_geom_reader.h @@ -11,6 +11,9 @@ #include <vector> #include <stdexcept> +namespace tadah { +namespace mlip { + /** * @class CastepGeomReader * @brief A class for reading and parsing CASTEP .geom files. @@ -38,17 +41,20 @@ class CastepGeomReader : public CastepMDReader { public: /** * @brief Constructs a CastepGeomReader with a StructureDB reference. - * @param db Reference to a StructureDB object for storing parsed data. */ - CastepGeomReader(StructureDB& db); + CastepGeomReader(); /** - * @brief Constructs a CastepGeomReader and reads the specified file. - * @param db Reference to a StructureDB object for storing parsed data. - * @param filename Name of the .geom file to read. + * @brief A simple constructor referencing a DB, plus file index, plus # of structures. + * @param db Reference to HPC DB (already sized for this file). + * @param filename Name of the .castep file. + * @param fileIndex Which file index in the DB to fill. + * @param nStructsInFile How many structures belong to this file in the DB. */ - CastepGeomReader(StructureDB& db, const std::string& filename); - + CastepGeomReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile); private: /** @@ -61,6 +67,8 @@ private: std::string get_label(std::string &) override; }; +} +} #endif // CASTEP_GEOM_READER_H diff --git a/include/tadah/mlip/dataset_readers/castep_md_reader.h b/include/tadah/mlip/dataset_readers/castep_md_reader.h index 8ef93cff2537900fe1d137b8bc354d7cdc315a6f..d482fa439c8884a297aba62fea97b9dfe0236eab 100644 --- a/include/tadah/mlip/dataset_readers/castep_md_reader.h +++ b/include/tadah/mlip/dataset_readers/castep_md_reader.h @@ -11,6 +11,10 @@ #include <vector> #include <stdexcept> +namespace tadah { +namespace mlip { + + /** * @class CastepMDReader * @brief A class for reading and parsing CASTEP .md files. @@ -41,16 +45,20 @@ class CastepMDReader : public DatasetReader { public: /** * @brief Constructs a CastepMDReader with a StructureDB reference. - * @param db Reference to a StructureDB object for storing parsed data. */ - CastepMDReader(StructureDB& db); + CastepMDReader(); /** - * @brief Constructs a CastepMDReader and reads the specified file. - * @param db Reference to a StructureDB object for storing parsed data. - * @param filename Name of the .md file to read. + * @brief A simple constructor referencing a DB, plus file index, plus # of structures. + * @param db Reference to HPC DB (already sized for this file). + * @param filename Name of the .castep file. + * @param fileIndex Which file index in the DB to fill. + * @param nStructsInFile How many structures belong to this file in the DB. */ - CastepMDReader(StructureDB& db, const std::string& filename); + CastepMDReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile); /** * @brief Reads data from a specified .md file. @@ -77,9 +85,20 @@ public: */ bool ends_with(const std::string& str, const std::string& suffix); + /** + * @brief Pass-1 scan of a .castep file to count how many structures + * appear, and how many atoms each structure has. + * + * @param filename The .castep file to scan. + * @param nStructures [out] Will be set to the number of structures found. + * @param atomsPerStructure [out] Will be filled with the number of atoms in each structure. + */ + void pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) override; + protected: std::string raw_data_; /**< Stores raw file data */ - std::string filename_; /**< Filename of the .md file */ // Unit conversion factors double e_conv = 27.211386245988; /**< Conversion factor from Hartree to eV */ @@ -103,7 +122,7 @@ protected: * @brief Performs post-processing on the given structure. * @param s Reference to a Structure object. */ - void postproc_structure(Structure &s); + void postproc_structure(Structure *s, size_t &structIdxInFile); double T = 0; /**< Temperature in atomic units (Hartree/k_B) */ bool stress_tensor_bool = false; /**< Indicates presence of a stress tensor */ @@ -113,6 +132,8 @@ private: virtual std::string get_label(std::string &) override; }; +} +} #endif // CASTEP_MD_READER_H diff --git a/include/tadah/mlip/dataset_readers/dataset_reader.h b/include/tadah/mlip/dataset_readers/dataset_reader.h index a83ab7e012fdfd8d4a1ef77edad4ad0b6d9ae529..eff10d27352fe98f93fcc789e985c63c563644e9 100644 --- a/include/tadah/mlip/dataset_readers/dataset_reader.h +++ b/include/tadah/mlip/dataset_readers/dataset_reader.h @@ -1,10 +1,18 @@ #ifndef DATASET_READER_H #define DATASET_READER_H +#include <tadah/mlip/structure.h> +#include <tadah/mlip/structure_inl.h> #include <tadah/mlip/structure_db.h> +#include <tadah/core/vec3d_soa_view.h> // positionView(), forceView() +#include <tadah/core/vec3d_soa_view_math_inl.h> // for transformBy(...) #include <string> #include <vector> +namespace tadah { +namespace mlip { + + /** * @class DatasetReader * @brief Abstract base class for reading and parsing dataset files. @@ -36,7 +44,7 @@ public: * @brief Parses the read data. * * This pure virtual function must be implemented by derived classes - * to define how the data is parsed. + * to define how the data is parsed and stored (e.g., in a StructureDB). */ virtual void parse_data() = 0; @@ -45,42 +53,80 @@ public: * * This pure virtual function must be implemented by derived classes * to provide a summary of the dataset. - * - * @param stdb Reference to a StructureDB object for data summary. */ virtual void print_summary() const = 0; /** - * @brief Returns a summary of the data. + * @brief Returns a summary of the data as a string. * * This pure virtual function must be implemented by derived classes - * to provide a summary of the dataset. + * to provide details about the dataset contents. * - * @param stdb Reference to a StructureDB object for data summary. + * @return A string containing summary information for the dataset. */ virtual std::string get_summary() const = 0; /** - * @brief Constructor initializing the reference. + * @brief First-pass count of structures and their atom counts within a file. * - * @param db Reference to a StructureDB object to store parsed data. + * Derived classes must implement this method to scan the file and + * identify how many structures it contains, as well as the number of + * atoms per structure. The goal is to gather all necessary information + * to allocate data structures prior to a detailed parse pass. + * + * @param filename The path to the dataset file to be scanned. + * @param nStructures [out] Will be filled with the total number of structures found. + * @param atomsPerStructure [out] Will be resized and filled such that + * each element corresponds to the number of + * atoms in the respective structure. + * + * @note This is a pure virtual function, so each derived reader must + * implement its own logic for identifying structures and atom + * counts in the file format it supports. */ - DatasetReader(StructureDB& db) : stdb(db) {} + virtual void pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) = 0; /** - * @brief Constructor initializing the reference and filename. - * - * @param db Reference to a StructureDB object to store parsed data. - * @param filename The name of the file to read data from. + * @brief Constructor initializing the reference to StructureDB and filename. */ - DatasetReader(StructureDB& db, const std::string& filename) - : stdb(db), filename(filename) {} + DatasetReader() + : filename(""), fileIndex_(0), nStructsInFile_(0) {} + /** + * @brief Updated constructor referencing StructureDB, specifying file index and # of structures. + * + * This variant passes in the file index and how many structures are + * allocated in the DB for that file. Derived readers can thus fill + * those Subrange(s) in parse_data() without calling add(). + * + * @param db Pointer to the HPC StructureDB (already sized). + * @param filename Name of the dataset file. + * @param fileIndex Which file index this reader corresponds to. + * @param nStructsInFile How many structures are allocated for this file in \p db. + */ + DatasetReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile) + : stdb(db), + filename(filename), + fileIndex_(fileIndex), + nStructsInFile_(nStructsInFile) + {} protected: - StructureDB& stdb; /**< Reference to store parsed data. */ - std::string filename; /**< Filename for data reading. */ + StructureDB* stdb; ///< Reference to HPC DB + std::string filename; ///< Name of the file being read + std::size_t fileIndex_{0}; ///< Which file index this reader is for + std::size_t nStructsInFile_{0}; ///< How many structures the DB allocated for this file + private: virtual std::string get_first_label(std::string &); virtual std::string get_label(std::string &); }; +} +} + #endif // DATASET_READER_H + diff --git a/include/tadah/mlip/dataset_readers/dataset_reader_selector.h b/include/tadah/mlip/dataset_readers/dataset_reader_selector.h index a09803ecc532770a28e191be52666ad02897b69a..503b41048ce186833c8ed77c75c8cb937b904184 100644 --- a/include/tadah/mlip/dataset_readers/dataset_reader_selector.h +++ b/include/tadah/mlip/dataset_readers/dataset_reader_selector.h @@ -6,6 +6,10 @@ #include <string> #include <memory> + +namespace tadah { +namespace mlip { + /** * @class DatasetReaderSelector * @brief Selects the appropriate DatasetReader based on file type. @@ -15,11 +19,24 @@ public: /** * @brief Factory method to create specific DatasetReader objects. * + * The correct DatasetReader object is determined based on the file type. + * * @param filepath File path to check the content. - * @param db Reference to a StructureDB object to store parsed data. + * @param db Pointer to a StructureDB object to store parsed data. * @return A unique pointer to a DatasetReader object. */ - static std::unique_ptr<DatasetReader> get_reader(const std::string& filepath, StructureDB& db); + static std::unique_ptr<DatasetReader> get_reader(StructureDB* db, + const std::string& filepath, + size_t fileIndex, + size_t nStructsInFile); + /** + * @brief Factory method to create specific DatasetReader objects. + * + * The correct DatasetReader object is determined based on the file type. + * + * @param filepath File path to check the content. + */ + static std::unique_ptr<DatasetReader> get_reader(const std::string& filepath); /** * @brief Determines the file type based on its content. @@ -30,5 +47,7 @@ public: static std::string determine_file_type_by_content(const std::string& filepath); }; +} +} #endif // DATASET_READER_SELECTOR_H diff --git a/include/tadah/mlip/dataset_readers/tadah_reader.h b/include/tadah/mlip/dataset_readers/tadah_reader.h new file mode 100644 index 0000000000000000000000000000000000000000..c05434859dba21768a069ecbcdfcd235cc2b16cb --- /dev/null +++ b/include/tadah/mlip/dataset_readers/tadah_reader.h @@ -0,0 +1,109 @@ +#ifndef TADAH_READER_H +#define TADAH_READER_H + +#include "dataset_reader.h" +#include <tadah/mlip/dataset_reader.h> +#include <tadah/mlip/structure.h> +#include <tadah/mlip/structure_inl.h> +#include <tadah/mlip/structure_db.h> +#include <string> +#include <vector> + +namespace tadah { +namespace mlip { + +/** + * @class TadahReader + * + * This class is a concrete implementation of the DatasetReader interface + * to read and parse data from a Tadah! dataset file. + * + * Provides functionality to count structures and atoms in a file + * without parsing the full data. + * + */ +class TadahReader: public DatasetReader { +public: + /** + * @brief Virtual destructor. + * + * Ensures proper cleanup of derived class objects. + */ + virtual ~TadahReader() = default; + + /** + * @brief Reads data from a specified file. + * + * @param filename The name of the file to read data from. + */ + virtual void read_data(const std::string& filename) override; + + /** + * @brief Parses the read data. + * + */ + virtual void parse_data() override; + /** + * @brief Prints a summary of the data. + * + */ + virtual void print_summary() const override; + + /** + * @brief Returns a summary of the data as a string. + * + * @return A string containing summary information for the dataset. + */ + virtual std::string get_summary() const override; + + /** + * @brief First-pass count of structures and their atom counts within a file. + * + * This function scans the dataset file to identify how many structures + * it contains, as well as the number of atoms per structure. + * The goal is to gather all necessary information + * to allocate data structures prior to a detailed parse pass. + * + * @param filename The path to the dataset file to be scanned. + * @param nStructures [out] Will be filled with the total number of structures found. + * @param atomsPerStructure [out] Will be resized and filled such that + * each element corresponds to the number of + * atoms in the respective structure. + */ + virtual void pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) override; + + /** + * @brief Constructor initializing the reference to StructureDB and filename. + * + * @param db Reference to a StructureDB object to store parsed data. + * @param filename The name of the file to read data from. + */ + TadahReader(): DatasetReader() {} + + /** + * @brief Updated constructor referencing StructureDB, specifying file index and # of structures. + * + * This variant passes in the file index and how many structures are + * allocated in the DB for that file. Derived readers can thus fill + * those Subrange(s) in parse_data() without calling add(). + * + * @param db Reference to the HPC StructureDB (already sized). + * @param filename Name of the dataset file. + * @param fileIndex Which file index this reader corresponds to. + * @param nStructsInFile How many structures are allocated for this file in \p db. + */ + TadahReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile) + : DatasetReader(db,filename,fileIndex,nStructsInFile) {} + +}; +} +} + +#endif // TADAH_READER_H + + diff --git a/include/tadah/mlip/dataset_readers/vasp_outcar_reader.h b/include/tadah/mlip/dataset_readers/vasp_outcar_reader.h index 34c861a26cc041f7331a3e2bcbea0211a2150ff8..bd7fa5f14d92b6f124ad84e0fb7ba6320ef11841 100644 --- a/include/tadah/mlip/dataset_readers/vasp_outcar_reader.h +++ b/include/tadah/mlip/dataset_readers/vasp_outcar_reader.h @@ -5,6 +5,10 @@ #include <tadah/mlip/dataset_readers/dataset_reader.h> #include <string> + +namespace tadah { +namespace mlip { + /** * @class VaspOutcarReader * @brief Concrete class for reading and parsing VASP OUTCAR files. @@ -31,9 +35,8 @@ public: /** * @brief Constructor initializing base class reference. * - * @param db Reference to a StructureDB object to store parsed data. */ - VaspOutcarReader(StructureDB& db); + VaspOutcarReader(); /** * @brief Constructor that initializes and reads from a file. @@ -43,7 +46,10 @@ public: * @param db Reference to a StructureDB object to store parsed data. * @param filename The name of the OUTCAR file to read data from. */ - VaspOutcarReader(StructureDB& db, const std::string& filename); + VaspOutcarReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile); /** * @brief Reads data from the specified OUTCAR file. @@ -71,10 +77,22 @@ public: virtual void print_summary() const override; virtual std::string get_summary() const override; + /** + * @brief Pass-1 scan of a .castep file to count how many structures + * appear, and how many atoms each structure has. + * + * @param filename The .castep file to scan. + * @param nStructures [out] Will be set to the number of structures found. + * @param atomsPerStructure [out] Will be filled with the number of atoms in each structure. + */ + void pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) override; private: std::string raw_data_; // Stores raw file data - std::string filename_; // Stores raw file data double s_conv = 6.241509074e-4; // kbar -> eV/A^3 }; +} +} #endif // VASP_OUTCAR_READER_H diff --git a/include/tadah/mlip/dataset_readers/vasp_vasprun_reader.h b/include/tadah/mlip/dataset_readers/vasp_vasprun_reader.h index b43e279f822909b7a0660d0c5d1274824f863384..73eef8b0e9252e69a61b31b96aa35b3e2574a474 100644 --- a/include/tadah/mlip/dataset_readers/vasp_vasprun_reader.h +++ b/include/tadah/mlip/dataset_readers/vasp_vasprun_reader.h @@ -11,6 +11,10 @@ namespace rx = rapidxml; + +namespace tadah { +namespace mlip { + /** * @class VaspVasprunReader * @brief Concrete class for reading and parsing VASP vasprun.xml files. @@ -37,9 +41,8 @@ public: /** * @brief Constructor initializing with a StructureDB reference. * - * @param stdb Reference to a StructureDB object for storing parsed data. */ - VaspVasprunReader(StructureDB& stdb); + VaspVasprunReader(); /** * @brief Constructor that initializes and reads from a file. @@ -49,7 +52,10 @@ public: * @param stdb Reference to a StructureDB object. * @param filename Name of the vasprun.xml file to read. */ - VaspVasprunReader(StructureDB& stdb, const std::string& filename); + VaspVasprunReader(StructureDB* stdb, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile); /** * @brief Destructor for VaspVasprunReader. @@ -87,6 +93,17 @@ public: virtual void print_summary() const override; virtual std::string get_summary() const override; + /** + * @brief Pass-1 scan of a .castep file to count how many structures + * appear, and how many atoms each structure has. + * + * @param filename The .castep file to scan. + * @param nStructures [out] Will be set to the number of structures found. + * @param atomsPerStructure [out] Will be filled with the number of atoms in each structure. + */ + void pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) override; protected: /** * @brief Extracts the number of atoms in the current structure. @@ -155,12 +172,13 @@ protected: rx::xml_document<> doc; ///< XML document object for parsing. rx::file<> *xmlFile = nullptr; ///< Optional pointer for file management. std::vector<std::string> atom_types; ///< List of atom types; length is the number of atoms. - StructureDB& stdb; ///< Reference to the StructureDB for storing data. private: Structure _s; ///< Internal structure representation. bool stress_tensor_bool = false; ///< Flag indicating stress tensor presence. double s_conv = 6.241509074e-4; // kbar -> eV/A^3 }; +} +} #endif // VASP_VASPRUN_READER_H diff --git a/include/tadah/mlip/dataset_writers/castep_cell_writer.h b/include/tadah/mlip/dataset_writers/castep_cell_writer.h index 90b799c4c70c0be8ad1748098765c4d9f5d50157..fbe4fe1f1c81cb960ca6af438e5fb71a7a87ccfb 100644 --- a/include/tadah/mlip/dataset_writers/castep_cell_writer.h +++ b/include/tadah/mlip/dataset_writers/castep_cell_writer.h @@ -10,13 +10,17 @@ #include <sstream> #include <vector> #include <stdexcept> +namespace tadah { +namespace mlip { class CastepCellWriter : public DatasetWriter { public: - CastepCellWriter(StructureDB& db); + CastepCellWriter(); - virtual void write_data(const std::string& filename, const size_t i) override; + virtual void write_data(const StructureDB& stdb, const std::string& filename, size_t structIndex) override; }; +} +} #endif // CASTEP_CELL_WRITER_H diff --git a/include/tadah/mlip/dataset_writers/dataset_writer.h b/include/tadah/mlip/dataset_writers/dataset_writer.h index 91ff858df6c7f77b083a654440ed371e224525be..c4ee1f85b4d81f3cfe85e9391321883bf3b726b4 100644 --- a/include/tadah/mlip/dataset_writers/dataset_writer.h +++ b/include/tadah/mlip/dataset_writers/dataset_writer.h @@ -1,23 +1,32 @@ #ifndef DATASET_WRITER_H #define DATASET_WRITER_H +#include <cstddef> #include <tadah/mlip/structure_db.h> +#include <tadah/mlip/structure.h> +#include <tadah/mlip/structure_inl.h> #include <string> -#include <vector> + + +namespace tadah { +namespace mlip { class DatasetWriter { public: virtual ~DatasetWriter() = default; - virtual void write_data(const std::string& filename, const size_t i) = 0; + virtual void write_data(const StructureDB &stdb, + const std::string& filename, + size_t structIndex) = 0; - DatasetWriter(StructureDB& db) : stdb(db) {}; + DatasetWriter() {}; virtual void set_precision(const size_t _p) { p = _p; w = p+6; }; protected: - StructureDB& stdb; double p = 10; // output precision double w = p+6; // column width }; +} +} #endif // DATASET_WRITER_H diff --git a/include/tadah/mlip/dataset_writers/dataset_writer_selector.h b/include/tadah/mlip/dataset_writers/dataset_writer_selector.h index 7ea4f85e78bec210bd6ee9a92347e6899026734b..806aa1dda4c8709e4c22d9dc6d2eda816191437b 100644 --- a/include/tadah/mlip/dataset_writers/dataset_writer_selector.h +++ b/include/tadah/mlip/dataset_writers/dataset_writer_selector.h @@ -6,10 +6,15 @@ #include <string> #include <memory> + +namespace tadah { +namespace mlip { + class DatasetWriterSelector { public: - static std::unique_ptr<DatasetWriter> get_writer(const std::string& type, StructureDB& db); - + static std::unique_ptr<DatasetWriter> get_writer(std::string type); }; +} +} #endif // DATASET_WRITER_SELECTOR_H diff --git a/include/tadah/mlip/dataset_writers/lammps_structure_writer.h b/include/tadah/mlip/dataset_writers/lammps_structure_writer.h index 5ee539aea758bf972b33262af856c830ce241846..aefb476cb3db7746f28d8cc82bd86b0b033db61b 100644 --- a/include/tadah/mlip/dataset_writers/lammps_structure_writer.h +++ b/include/tadah/mlip/dataset_writers/lammps_structure_writer.h @@ -10,12 +10,18 @@ #include <sstream> #include <vector> #include <stdexcept> + +namespace tadah { +namespace mlip { + class LammpsStructureWriter : public DatasetWriter { public: - LammpsStructureWriter(StructureDB& db); + LammpsStructureWriter(); - virtual void write_data(const std::string& filename, const size_t i) override; + virtual void write_data(const StructureDB& db, const std::string& filename, size_t structIndex) override; }; +} +} #endif // LAMMPS_STRUCTURE_WRITER_H diff --git a/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h b/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h index abd58b8b38aa34b1330e0c22703ff4d24c0df9e4..7fd11c3e9017d599e8ba31db9301ec9da7e02819 100644 --- a/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h +++ b/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h @@ -10,12 +10,18 @@ #include <sstream> #include <vector> #include <stdexcept> + +namespace tadah { +namespace mlip { + class VaspPoscarWriter : public DatasetWriter { public: - VaspPoscarWriter(StructureDB& db); + VaspPoscarWriter(); - virtual void write_data(const std::string& filename, const size_t i) override; + virtual void write_data(const StructureDB &db, const std::string& filename, size_t structIndex) override; }; +} +} #endif // VASP_POSCAR_WRITER_H diff --git a/include/tadah/mlip/descriptors_calc.h b/include/tadah/mlip/descriptors_calc.h index 14d1c4be9194f599cac14a0e27de32b9b63a8e1a..d4e9a77c918be467f1e873b1377aeb4d10996c11 100644 --- a/include/tadah/mlip/descriptors_calc.h +++ b/include/tadah/mlip/descriptors_calc.h @@ -12,6 +12,8 @@ #include <tadah/models/descriptors/dm/dm_base.h> #include <tadah/models/cutoffs.h> +namespace tadah { +namespace mlip { /** \brief Descriptors calculator * * This object does not store any data. @@ -122,6 +124,8 @@ class DescriptorsCalc: public DC_Base { void calc_dimer(const Structure &st, StDescriptors &std); void common_constructor(); }; +} +} #include "descriptors_calc.hpp" #endif diff --git a/include/tadah/mlip/descriptors_calc.hpp b/include/tadah/mlip/descriptors_calc.hpp index b5bc9b1e16a82cf2d6cf2e0822f3a9ba0a453795..f7e876f565aa9a68dfdf80a712b0c052b68b7378 100644 --- a/include/tadah/mlip/descriptors_calc.hpp +++ b/include/tadah/mlip/descriptors_calc.hpp @@ -7,6 +7,8 @@ #include <cstdio> +namespace tadah { +namespace mlip { template <typename D2, typename D3, typename DM, typename C2, typename C3, typename CM> template <typename T1, typename T2, typename T3> DescriptorsCalc<D2,D3,DM,C2,C3,CM>::DescriptorsCalc(Config &c, T1 &t1, T2 &t2, T3 &t3): @@ -445,4 +447,6 @@ StDescriptorsDB DescriptorsCalc<D2,D3,DM,C2,C3,CM>::calc(const StructureDB &stdb calc(stdb(i),st_desc_db(i)); return st_desc_db; } +} +} #endif diff --git a/include/tadah/mlip/descriptors_calc_base.h b/include/tadah/mlip/descriptors_calc_base.h index a56c26f328d261161e21688d844aad0e68c90ddf..d1b901a151f34a53273866c644bfcf23a281b9b5 100644 --- a/include/tadah/mlip/descriptors_calc_base.h +++ b/include/tadah/mlip/descriptors_calc_base.h @@ -6,10 +6,14 @@ #include <tadah/mlip/st_descriptors.h> #include <tadah/mlip/st_descriptors_db.h> +namespace tadah { +namespace mlip { class DC_Base { public: virtual ~DC_Base() {}; virtual StDescriptors calc(const Structure &) { return StDescriptors(); }; virtual StDescriptorsDB calc(const StructureDB &) {return StDescriptorsDB();}; }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/design_matrix.h b/include/tadah/mlip/design_matrix/design_matrix.h index c9c02cb42c38f7da8e66b6da196243f98f3873e1..b98239c66056dbd8a30e9594fbb8afe12a658a39 100644 --- a/include/tadah/mlip/design_matrix/design_matrix.h +++ b/include/tadah/mlip/design_matrix/design_matrix.h @@ -12,6 +12,8 @@ #include <stdexcept> +namespace tadah { +namespace mlip { class DesignMatrixBase { public: @@ -349,4 +351,6 @@ private: config.add("SSTDEV",s_std_dev[j]); } }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_all.h b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_all.h index 48179cf65d71ab9692485bdc246206e5417910b1..791be6e9fc17e0c93257fd3b7019bea3ca9a36e7 100644 --- a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_all.h +++ b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_all.h @@ -1,2 +1,6 @@ #include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_linear.h> #include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_polynomial2.h> +namespace tadah { +namespace mlip { +} +} diff --git a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h index 6f6187dbc2fffd1d7b799e33a1765d4db11a3fe1..8dc9cc4f46469270a69e71dfbc9ad0555364deb1 100644 --- a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h +++ b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h @@ -9,6 +9,8 @@ #include <iostream> +namespace tadah { +namespace mlip { struct DM_BF_Base: public DM_Function_Base, public virtual BF_Base { DM_BF_Base(); @@ -22,4 +24,6 @@ struct DM_BF_Base: public DM_Function_Base, public virtual BF_Base { // virtual void calc_phi_stress_rows(phi_type &Phi, size_t &row, // const double fac[6], const Structure &st, const StDescriptors &st_d)=0; }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_linear.h b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_linear.h index e0052c373aad55baa891fc67e6476dccedacbda0..6fa887d61e8173a48f60ce29410128b20cf7696f 100644 --- a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_linear.h +++ b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_linear.h @@ -4,6 +4,8 @@ #include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h> #include <tadah/models/functions/basis_functions/bf_linear.h> +namespace tadah { +namespace mlip { struct DM_BF_Linear: public DM_BF_Base, public BF_Linear { DM_BF_Linear(); @@ -16,4 +18,6 @@ struct DM_BF_Linear: public DM_BF_Base, public BF_Linear void calc_phi_stress_rows(phi_type &Phi, size_t &row, const double fac[6], const Structure &st, const StDescriptors &st_d) override; }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_polynomial2.h b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_polynomial2.h index 550f4038e5cbba6b1b240b2c21a60396f630d69d..bda5f5a593d4915c0a238173c1ef389ca4f7ee19 100644 --- a/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_polynomial2.h +++ b/include/tadah/mlip/design_matrix/functions/basis_functions/dm_bf_polynomial2.h @@ -4,6 +4,8 @@ #include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h> #include <tadah/models/functions/basis_functions/bf_polynomial2.h> +namespace tadah { +namespace mlip { struct DM_BF_Polynomial2: public DM_BF_Base, public BF_Polynomial2 { DM_BF_Polynomial2(); @@ -16,4 +18,6 @@ struct DM_BF_Polynomial2: public DM_BF_Base, public BF_Polynomial2 void calc_phi_stress_rows(phi_type &Phi, size_t &row, const double fac[6], const Structure &st, const StDescriptors &st_d) override; }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/dm_f_all.h b/include/tadah/mlip/design_matrix/functions/dm_f_all.h index 56c4baed80353ca9d691d8a94594b605a46f8143..864a56f2a2253204c730cc5677408aa985768306 100644 --- a/include/tadah/mlip/design_matrix/functions/dm_f_all.h +++ b/include/tadah/mlip/design_matrix/functions/dm_f_all.h @@ -1,2 +1,6 @@ #include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_all.h> #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_all.h> +namespace tadah { +namespace mlip { +} +} diff --git a/include/tadah/mlip/design_matrix/functions/dm_function_base.h b/include/tadah/mlip/design_matrix/functions/dm_function_base.h index 06f53215d403f1e4ca665ab93dcddc698ab90a7d..49e92bc91b0ac88d9dafc671cd6b7e72a4fa0a80 100644 --- a/include/tadah/mlip/design_matrix/functions/dm_function_base.h +++ b/include/tadah/mlip/design_matrix/functions/dm_function_base.h @@ -13,6 +13,8 @@ #include <limits> #include <vector> +namespace tadah { +namespace mlip { /** Base class for Kernels and Basis Functions */ struct DM_Function_Base: public virtual Function_Base { @@ -29,6 +31,8 @@ struct DM_Function_Base: public virtual Function_Base { virtual void calc_phi_stress_rows(phi_type &, size_t &, const double[6], const Structure &, const StDescriptors &)=0; }; +} +} //template<> inline CONFIG::Registry<DM_Function_Base>::Map CONFIG::Registry<DM_Function_Base>::registry{}; //template<> inline CONFIG::Registry<DM_Function_Base,Config&>::Map CONFIG::Registry<DM_Function_Base,Config&>::registry{}; #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_all.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_all.h index b53a2f63d862fecb09f6410c744ee549d13c5784..91d83185d7b704a0ea01a702f0c1717f6568aa1b 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_all.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_all.h @@ -5,3 +5,7 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_polynomial.h> #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_sigmoid.h> #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_quadratic.h> +namespace tadah { +namespace mlip { +} +} diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h index 8a12489c2eaead83a3a64915cf65cea398045fc3..5bc532c7ea2a5a30536690fe1967590588226552 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h @@ -9,6 +9,8 @@ #include <tadah/models/functions/kernels/kern_base.h> #include <iostream> +namespace tadah { +namespace mlip { /** \brief Abstract class to be used as a base for all kernels. * @@ -29,4 +31,6 @@ class DM_Kern_Base: public DM_Function_Base, public virtual Kern_Base { virtual void calc_phi_stress_rows(phi_type &Phi, size_t &row, const double fac[6], const Structure &st, const StDescriptors &st_d) override; }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_linear.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_linear.h index cf026d05cf1baf954868d897af9c3158782315f5..fcad6b9b14e08a833cb1b2410935fb147c438385 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_linear.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_linear.h @@ -3,6 +3,8 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> #include <tadah/models/functions/kernels/kern_linear.h> +namespace tadah { +namespace mlip { /** * Linear kernel also knows as dot product kernel * @@ -27,4 +29,6 @@ class DM_Kern_Linear : public DM_Kern_Base, public Kern_Linear { void calc_phi_stress_rows(phi_type &Phi, size_t &row, const double fac[6], const Structure &st, const StDescriptors &st_d) override; }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_lq.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_lq.h index f0b44451955e6c2e1f7f592397d318d2599fa31b..704f25aac835da955af64453eae73b39f4378686 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_lq.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_lq.h @@ -4,9 +4,13 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> #include <tadah/models/functions/kernels/kern_lq.h> +namespace tadah { +namespace mlip { class DM_Kern_LQ : public DM_Kern_Base, public Kern_LQ { public: DM_Kern_LQ(); DM_Kern_LQ(const Config &c); }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_polynomial.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_polynomial.h index 5094370338d006531c85b4450a81878d7fcb6653..ed8b4b47c7e078356356524f5f1c8527ddbc6386 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_polynomial.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_polynomial.h @@ -4,9 +4,13 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> #include <tadah/models/functions/kernels/kern_polynomial.h> +namespace tadah { +namespace mlip { class DM_Kern_Polynomial : public DM_Kern_Base, public Kern_Polynomial { public: DM_Kern_Polynomial(); DM_Kern_Polynomial(const Config &c); }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_quadratic.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_quadratic.h index ecb85350cdc42c698547432ae1965124bd6fcde5..5d977111c722cf30d29e2722fffb589a2c366d82 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_quadratic.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_quadratic.h @@ -4,9 +4,13 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> #include <tadah/models/functions/kernels/kern_quadratic.h> +namespace tadah { +namespace mlip { class DM_Kern_Quadratic : public DM_Kern_Base, public Kern_Quadratic { public: DM_Kern_Quadratic(); DM_Kern_Quadratic(const Config &c); }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_rbf.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_rbf.h index 7e1251afe93a266366024cf5d22a43869287d84a..d68c1141f652d865b2665c014890f99b55b540b2 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_rbf.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_rbf.h @@ -4,9 +4,13 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> #include <tadah/models/functions/kernels/kern_rbf.h> +namespace tadah { +namespace mlip { class DM_Kern_RBF : public DM_Kern_Base, public Kern_RBF { public: DM_Kern_RBF(); DM_Kern_RBF(const Config &c); }; +} +} #endif diff --git a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_sigmoid.h b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_sigmoid.h index c62a50ef586e4dd76390d5d710deec129c679d46..75c47a0b400bb85eb75cb581b513ce877db64278 100644 --- a/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_sigmoid.h +++ b/include/tadah/mlip/design_matrix/functions/kernels/dm_kern_sigmoid.h @@ -4,9 +4,13 @@ #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> #include <tadah/models/functions/kernels/kern_sigmoid.h> +namespace tadah { +namespace mlip { class DM_Kern_Sigmoid : public DM_Kern_Base, public Kern_Sigmoid { public: DM_Kern_Sigmoid(); DM_Kern_Sigmoid(const Config &c); }; +} +} #endif diff --git a/include/tadah/mlip/mlip_context.h b/include/tadah/mlip/mlip_context.h new file mode 100644 index 0000000000000000000000000000000000000000..4376cee794542d8c9b5a9e0016b42ee18432400a --- /dev/null +++ b/include/tadah/mlip/mlip_context.h @@ -0,0 +1,167 @@ +#pragma once + +#include <memory> +#include <stdexcept> + +#include <tadah/mlip/structure_db.h> +#include <tadah/mlip/neighbor_list_db.h> +#include <tadah/mlip/neighbor_calc.h> + +namespace tadah { +namespace mlip { + +/** + * @class MLIPContext + * @brief Provides a central place to manage a StructureDB and its neighbor data. + * + * References a single StructureDB for coordinates and atomic information, + * and allocates or manages a NeighborListDB plus a NeighborCalc. + * Reduces overhead of tracking these data structures separately and simplifies + * neighbor-related operations, including potential future HPC or ML expansions. + */ +class MLIPContext +{ +public: + /** + * @brief Constructs the context from an existing StructureDB reference. + * + * Expects 'db' to remain valid for the entire lifetime of this context. + * Avoids copying large HPC arrays by referencing them directly. + */ + inline explicit MLIPContext(StructureDB &db) + : structDB_(db) + { + // Call setupNeighborData() to establish neighbor-related objects if needed. + } + + /** + * @brief Allocates and configures neighbor-related objects for HPC usage. + * + * Creates a NeighborListDB that references structDB_. + * Instantiates a NeighborCalc to populate HPC neighbor arrays if requested. + * Acceptable to call again to recreate or update these objects. + */ + inline void setupNeighborData() + { + nlistDB_ = std::make_unique<NeighborListDB>(structDB_); + ncalc_ = std::make_unique<NeighborCalc>(); + } + + /** + * @brief Performs neighbor-list building across the entire DB, using the stored NeighborCalc. + * + * Delegates to neighborCalc()->build(...), populating nlistDB_->nlist() from HPC SoA data. + * This design suits naive O(N^2) building or advanced algorithms if implemented in NeighborCalc. + * + * @param cutoff Distance cutoff for neighbor detection. + * + * @throws std::runtime_error if neighborListDB or neighborCalc has not been initialized. + */ + inline void buildNeighbors(double cutoff) + { + if (!nlistDB_) { + throw std::runtime_error( + "MLIPContext::buildNeighbors: NeighborListDB not available. " + "Call setupNeighborData() first." + ); + } + if (!ncalc_) { + throw std::runtime_error( + "MLIPContext::buildNeighbors: NeighborCalc not available. " + "Call setupNeighborData() first." + ); + } + + ncalc_->build( + nlistDB_->nlist(), + structDB_.X_.data(), + structDB_.X_.size(), + cutoff + ); + } + + /** + * @brief Returns a reference to StructureDB for reading or modifying HPC arrays. + */ + inline StructureDB& structureDB() + { + return structDB_; + } + + /** + * @brief Returns a read-only reference to the StructureDB. + */ + inline const StructureDB& structureDB() const + { + return structDB_; + } + + /** + * @brief Returns a reference to NeighborListDB for HPC neighbor queries or modifications. + * + * Valid after setupNeighborData() has been called. + */ + inline NeighborListDB& neighborListDB() + { + if (!nlistDB_) { + throw std::runtime_error( + "MLIPContext::neighborListDB(): Not initialized. " + "Call setupNeighborData() first." + ); + } + return *nlistDB_; + } + + /** + * @brief Returns a const reference to NeighborListDB for read-only neighbor operations. + */ + inline const NeighborListDB& neighborListDB() const + { + if (!nlistDB_) { + throw std::runtime_error( + "MLIPContext::neighborListDB() const: Not initialized. " + "Call setupNeighborData() first." + ); + } + return *nlistDB_; + } + + /** + * @brief Returns the stored NeighborCalc for custom or advanced neighbor-building steps. + * + * Valid after setupNeighborData() has been called. + * Replaces or enhances it if needed for advanced HPC or parallel approaches. + */ + inline NeighborCalc& neighborCalc() + { + if (!ncalc_) { + throw std::runtime_error( + "MLIPContext::neighborCalc(): Not initialized. " + "Call setupNeighborData() first." + ); + } + return *ncalc_; + } + +private: + /** + * Reference to the HPC structure database that holds SoA coordinates. Not owned here. + */ + StructureDB &structDB_; + + /** + * Holds a neighbor list referencing all atoms in structDB_, plus local->global mapping. + * Created in setupNeighborData(). + */ + std::unique_ptr<NeighborListDB> nlistDB_; + + /** + * Provides the actual logic for building or updating HPC neighbor arrays. + * May use O(N^2) or more advanced HPC methods. Created in setupNeighborData(). + */ + std::unique_ptr<NeighborCalc> ncalc_; +}; + +} // namespace mlip +} // namespace tadah + diff --git a/include/tadah/mlip/neighbor_calc.h b/include/tadah/mlip/neighbor_calc.h new file mode 100644 index 0000000000000000000000000000000000000000..e2e7e5962e78594d4155296f2a875c00e8048762 --- /dev/null +++ b/include/tadah/mlip/neighbor_calc.h @@ -0,0 +1,110 @@ +#pragma once + +#include "neighbor_list.h" +#include <vector> +#include <cmath> + +/** + * @file neighbor_calc.h + * @brief Demonstrates building a NeighborList using a naive O(N^2) approach. + * + * This example: + * - Loops over all pairs (i,j). + * - Checks if distance^2 < cutoff^2. + * - If true, adds them as neighbors in both directions (i->j, j->i). + * - Sets shift arrays to zero and uses mirrorIndex_ to store symmetrical offsets. + */ + +namespace tadah { +namespace mlip { + +/** + * @class NeighborCalc + * @brief Populates a NeighborList by scanning atom pairs for distances below cutoff. + */ +class NeighborCalc +{ +public: + /** + * @brief Build the neighbor list with a naive O(N^2) distance check. + * + * @param nlist The NeighborList to fill. + * @param coords Pointer to x,y,z coords in [3*nAtoms]. + * @param nAtoms Number of atoms in coords. + * @param cutoff Distance cutoff for neighbor inclusion. + */ + void build(NeighborList &nlist, + const double *coords, + std::size_t nAtoms, + double cutoff) + { + std::vector<std::size_t> counts(nAtoms, 0); + + for (std::size_t i = 0; i < nAtoms; ++i) { + for (std::size_t j = i + 1; j < nAtoms; ++j) { + double dx = coords[3*j + 0] - coords[3*i + 0]; + double dy = coords[3*j + 1] - coords[3*i + 1]; + double dz = coords[3*j + 2] - coords[3*i + 2]; + double r2 = dx*dx + dy*dy + dz*dz; + if (r2 < cutoff*cutoff) { + counts[i]++; + counts[j]++; + } + } + } + + std::size_t totalCount = 0; + for (auto c : counts) { + totalCount += c; + } + + nlist.resize(nAtoms, totalCount); + + std::size_t *pref = nlist.prefixData(); + pref[0] = 0; + for (std::size_t i = 0; i < nAtoms; i++) { + pref[i+1] = pref[i] + counts[i]; + } + + std::vector<std::size_t> writePos(nAtoms, 0); + + std::size_t *nbData = nlist.neighborData(); + int *sxData = nlist.shiftXData(); + int *syData = nlist.shiftYData(); + int *szData = nlist.shiftZData(); + std::size_t *mirrData = nlist.mirrorIndexData(); + + for (std::size_t i = 0; i < nAtoms; ++i) { + for (std::size_t j = i + 1; j < nAtoms; ++j) { + double dx = coords[3*j + 0] - coords[3*i + 0]; + double dy = coords[3*j + 1] - coords[3*i + 1]; + double dz = coords[3*j + 2] - coords[3*i + 2]; + double r2 = dx*dx + dy*dy + dz*dz; + + if (r2 < cutoff*cutoff) { + std::size_t offI = pref[i] + writePos[i]; + nbData[offI] = j; + sxData[offI] = 0; + syData[offI] = 0; + szData[offI] = 0; + + std::size_t offJ = pref[j] + writePos[j]; + nbData[offJ] = i; + sxData[offJ] = 0; + syData[offJ] = 0; + szData[offJ] = 0; + + mirrData[offI] = offJ; + mirrData[offJ] = offI; + + writePos[i]++; + writePos[j]++; + } + } + } + } +}; + +} // end namespace mlip +} // end namespace tadah + diff --git a/include/tadah/mlip/neighbor_list.h b/include/tadah/mlip/neighbor_list.h new file mode 100644 index 0000000000000000000000000000000000000000..b16c8d04cc78cc942ec28c10bdcfa3b4f40f9e4a --- /dev/null +++ b/include/tadah/mlip/neighbor_list.h @@ -0,0 +1,131 @@ +#pragma once + +#include <vector> +#include <cstddef> +#include <stdexcept> + +/** + * @brief Stores a SoA neighbor list, including offsets for each atom and shift information. + * + * This class maintains: + * - prefixSum_ for offsets + * - neighbors_ for flattened neighbor indices + * - shiftX_, shiftY_, shiftZ_ for periodic or other shifts + * - mirrorIndex_ for symmetrical pair offsets + * + * Single-pointer accessors (getNeighborsPtr, etc.) help in HPC loops. + * Typically fill these arrays via a separate "NeighborCalc". + */ + +namespace tadah { +namespace mlip { + +/** + * @class NeighborList + * @brief SoA neighbor list container with single-pointer interface and optional shifts. + */ +class NeighborList +{ +public: + inline NeighborList() = default; + + inline void resize(std::size_t nAtoms, std::size_t totalNeighborCount) + { + prefixSum_.resize(nAtoms + 1); + neighbors_.resize(totalNeighborCount); + shiftX_.resize(totalNeighborCount); + shiftY_.resize(totalNeighborCount); + shiftZ_.resize(totalNeighborCount); + mirrorIndex_.resize(totalNeighborCount); + } + + inline std::size_t numAtoms() const + { + if (prefixSum_.empty()) { + return 0; + } + return prefixSum_.size() - 1; + } + + inline std::size_t totalNeighborCount() const + { + return neighbors_.size(); + } + + inline std::size_t numNeighbors(std::size_t i) const + { + if (i >= numAtoms()) { + return 0; + } + return prefixSum_[i + 1] - prefixSum_[i]; + } + + inline const std::size_t* getNeighborsPtr(std::size_t i) const + { + if (i >= numAtoms()) { + return nullptr; + } + return &neighbors_[ prefixSum_[i] ]; + } + + inline const int* getShiftXPtr(std::size_t i) const + { + if (i >= numAtoms()) { + return nullptr; + } + return &shiftX_[ prefixSum_[i] ]; + } + inline const int* getShiftYPtr(std::size_t i) const + { + if (i >= numAtoms()) { + return nullptr; + } + return &shiftY_[ prefixSum_[i] ]; + } + inline const int* getShiftZPtr(std::size_t i) const + { + if (i >= numAtoms()) { + return nullptr; + } + return &shiftZ_[ prefixSum_[i] ]; + } + + inline const std::size_t* getMirrorIndexPtr(std::size_t i) const + { + if (i >= numAtoms()) { + return nullptr; + } + return &mirrorIndex_[ prefixSum_[i] ]; + } + + inline std::size_t getMirrorOffset(std::size_t i, std::size_t k) const + { + return mirrorIndex_[ prefixSum_[i] + k ]; + } + + inline std::size_t* prefixData() { return prefixSum_.data(); } + inline std::size_t* neighborData() { return neighbors_.data(); } + inline int* shiftXData() { return shiftX_.data(); } + inline int* shiftYData() { return shiftY_.data(); } + inline int* shiftZData() { return shiftZ_.data(); } + inline std::size_t* mirrorIndexData() { return mirrorIndex_.data(); } + + inline const std::size_t* prefixData() const { return prefixSum_.data(); } + inline const std::size_t* neighborData() const { return neighbors_.data(); } + inline const int* shiftXData() const { return shiftX_.data(); } + inline const int* shiftYData() const { return shiftY_.data(); } + inline const int* shiftZData() const { return shiftZ_.data(); } + inline const std::size_t* mirrorIndexData() const { return mirrorIndex_.data(); } + +private: + std::vector<std::size_t> prefixSum_; + std::vector<std::size_t> neighbors_; + std::vector<int> shiftX_; + std::vector<int> shiftY_; + std::vector<int> shiftZ_; + std::vector<std::size_t> mirrorIndex_; +}; + +} // namespace mlip +} // namespace tadah + diff --git a/include/tadah/mlip/neighbor_list_db.h b/include/tadah/mlip/neighbor_list_db.h new file mode 100644 index 0000000000000000000000000000000000000000..1143607341788e9031f0e3666711eed406ce3495 --- /dev/null +++ b/include/tadah/mlip/neighbor_list_db.h @@ -0,0 +1,118 @@ +#pragma once + +#include <cstddef> +#include <stdexcept> + +#include "neighbor_list.h" +#include "structure_db.h" + +namespace tadah { +namespace mlip { + +/** + * @class NeighborListDB + * @brief Holds a single global NeighborList for an entire StructureDB, along with + * convenience methods for mapping (structureIndex, localAtomIndex). + * + * This design: + * - References a StructureDB for HPC SoA (X_, Y_, Z_, etc.). + * - Stores one NeighborList covering atom indices [0..(N-1)] for all structures. + * - Expects an external "NeighborCalc" or code to fill nlist_ from the SoA data. + */ +class NeighborListDB +{ +public: + /** + * @brief Binds this object to a const StructureDB reference. The DB must outlive this object. + */ + explicit NeighborListDB(const StructureDB &db) + : db_(db) + {} + + /** + * @brief Provides mutable access to the HPC neighbor list, allowing + * external building or updates. + */ + inline NeighborList& nlist() { + return nlist_; + } + + /** + * @brief Provides read-only access to the HPC neighbor list. + */ + inline const NeighborList& nlist() const { + return nlist_; + } + + /** + * @brief Converts (structureIndex, localAtomIndex) to a "global atom index" used by the HPC lists. + */ + inline std::size_t toGlobalAtomIndex(std::size_t structIndex, + std::size_t localAtomIndex) const + { + if (structIndex >= db_.size()) { + throw std::out_of_range("NeighborListDB::toGlobalAtomIndex: structIndex out of range"); + } + const Structure &sRef = db_(structIndex); + if (localAtomIndex >= sRef.natoms()) { + throw std::out_of_range("NeighborListDB::toGlobalAtomIndex: localAtomIndex out of range"); + } + return sRef.offset_ + localAtomIndex; + } + + /** + * @brief Returns a pointer to the neighbor array for (structureIndex, localAtomIndex). + */ + inline const std::size_t* getNeighborsPtr(std::size_t structIndex, + std::size_t localAtomIndex) const + { + std::size_t gIdx = toGlobalAtomIndex(structIndex, localAtomIndex); + return nlist_.getNeighborsPtr(gIdx); + } + + /** + * @brief Returns how many neighbors exist for (structureIndex, localAtomIndex). + */ + inline std::size_t numNeighbors(std::size_t structIndex, + std::size_t localAtomIndex) const + { + std::size_t gIdx = toGlobalAtomIndex(structIndex, localAtomIndex); + return nlist_.numNeighbors(gIdx); + } + + /** + * @brief Returns a pointer to the mirror offset array for symmetrical pairs, + * mapped from (structureIndex, localAtomIndex). + */ + inline const std::size_t* getMirrorIndexPtr(std::size_t structIndex, + std::size_t localAtomIndex) const + { + std::size_t gIdx = toGlobalAtomIndex(structIndex, localAtomIndex); + return nlist_.getMirrorIndexPtr(gIdx); + } + + /** + * @brief Returns the shiftXPtr for (structureIndex, localAtomIndex), if needed. + */ + inline const int* getShiftXPtr(std::size_t structIndex, + std::size_t localAtomIndex) const + { + std::size_t gIdx = toGlobalAtomIndex(structIndex, localAtomIndex); + return nlist_.getShiftXPtr(gIdx); + } + +private: + /** + * @brief A read-only reference to the HPC structure database. + */ + const StructureDB &db_; + + /** + * @brief A single HPC neighbor list representing all atoms in db_. + */ + NeighborList nlist_; +}; + +} // namespace mlip +} // namespace tadah + diff --git a/include/tadah/mlip/nn_finder.h b/include/tadah/mlip/nn_finder.h index 2593334383f35c22037303bbf6784879bdd6e1b3..561d267fb6fd564615b901181386390d74f5878f 100644 --- a/include/tadah/mlip/nn_finder.h +++ b/include/tadah/mlip/nn_finder.h @@ -6,6 +6,8 @@ #include <tadah/mlip/structure.h> #include <tadah/mlip/structure_db.h> +namespace tadah { +namespace mlip { /** * @class NNFinder * @@ -156,6 +158,8 @@ public: */ void calc(StructureDB &stdb); }; +} +} #endif // NN_FINDER_H diff --git a/include/tadah/mlip/normaliser.h b/include/tadah/mlip/normaliser.h index 72e8be17b58cba85a1761360313df9da5fc454b4..f78a1c7a0084df1ae13defdce925042a623f8143 100644 --- a/include/tadah/mlip/normaliser.h +++ b/include/tadah/mlip/normaliser.h @@ -9,6 +9,8 @@ #include <limits> #include <stdexcept> +namespace tadah { +namespace mlip { class Normaliser: public Normaliser_Core { public: using Normaliser_Core::normalise; @@ -95,4 +97,6 @@ class Normaliser: public Normaliser_Core { } }; +} +} #endif diff --git a/include/tadah/mlip/st_descriptors.h b/include/tadah/mlip/st_descriptors.h index f5cf2ad234a067afafb78c18c02427cfe3e0cee9..a3206efdd3300dcc5f37df1186cb0ac627eddfb8 100644 --- a/include/tadah/mlip/st_descriptors.h +++ b/include/tadah/mlip/st_descriptors.h @@ -7,6 +7,8 @@ #include <vector> +namespace tadah { +namespace mlip { /** \brief Container for a structure descriptors. * @@ -77,4 +79,6 @@ struct StDescriptors { size_t dim() const; }; +} +} #endif diff --git a/include/tadah/mlip/st_descriptors_db.h b/include/tadah/mlip/st_descriptors_db.h index 6272db8c0b1dab60451dae98beaa6ec42c9d85a6..d9aa5ed612d084fb91f1e20ceb4b19ac43583677 100644 --- a/include/tadah/mlip/st_descriptors_db.h +++ b/include/tadah/mlip/st_descriptors_db.h @@ -5,6 +5,8 @@ #include <tadah/mlip/st_descriptors.h> #include <tadah/mlip/structure_db.h> +namespace tadah { +namespace mlip { /** \brief Container for StDescriptors. * @@ -45,4 +47,6 @@ struct StDescriptorsDB { std::vector<StDescriptors>::const_iterator end() const; }; +} +} #endif diff --git a/include/tadah/mlip/structure.h b/include/tadah/mlip/structure.h index 9ab8d6af1eee4b6bc76693b41aee2a5c3b220e9e..b47cfa7acac39eb0a724019eef06797010e23669 100644 --- a/include/tadah/mlip/structure.h +++ b/include/tadah/mlip/structure.h @@ -1,317 +1,199 @@ -#ifndef STRUCTURE_h -#define STRUCTURE_h - -#include <tadah/mlip/atom.h> -#include <tadah/core/core_types.h> -#include <tadah/core/periodic_table.h> +#pragma once +#ifndef TADAH_MLIP_STRUCTURE_H +#define TADAH_MLIP_STRUCTURE_H +#include <cstddef> #include <string> -#include <iostream> #include <fstream> -#include <iomanip> -#include <vector> -#include <set> +#include <stdexcept> +#include <iterator> + +// HPC types (Matrix3d, stress_type) come from here: +#include <tadah/core/periodic_table.h> +#include <tadah/core/core_types.h> +#include <tadah/core/vec3d_soa_view.h> + +// Forward-declare classes to avoid circular includes +namespace tadah { +namespace mlip { +class Atom; // Defined in "atom.h" +class StructureDB; // Defined in "structure_db.h" +} // namespace mlip +} // namespace tadah + +namespace tadah { +namespace mlip { /** - * Container for a collection of Atom(s). - * - * Holds list of Atom(s), cell dimensions, system energy and stress tensor. - * - * Pass this object to NNFinder to build a list of nearest - * neighbour positions for every Atom in the list. - * - * Usage example: - * - * # Default constructor creates an empty Structure object. - * # All attributes are left uninitialised - * Structure st; - * - * Usage example: - * - * # Print object summary with streams: - * std::cout << st; - * - * # Print 2nd atom data - * std::cout << st(2) - * - * # Print stress tensor - * std::cout << st.stress - * - * @see Atom StructureDB NNFinder + * @brief Manages a subrange of SoA arrays used for storing atoms. */ -struct Structure { - - /** Create an empty Structure object. - * - * All class attributes are left uninitialised. - */ +class Structure { +public: + // -------------------------------------------------- + // Constructors and main fields + // -------------------------------------------------- Structure(); + Structure(StructureDB* db, size_t offset, size_t capacity); - /** Text label for this structure. */ std::string label; + double energy = 0.0; + Matrix3d cell; + stress_type stress; + double T = 0.0; + double eweight = 1.0, fweight = 1.0, sweight = 1.0; + + // Tracks how many atoms are currently stored in this structure’s subrange + size_t natoms() const { return size_; } + + // -------------------------------------------------- + // Access to individual Atom + // -------------------------------------------------- + Atom operator()(size_t i); + const Atom operator()(size_t i) const; + + // Adds an atom to the SoA subrange + void addAtom(int Z, double xVal, double yVal, double zVal, + double fxVal, double fyVal, double fzVal); + + // SoA references for HPC (non-const) + double& x(size_t i); + double& y(size_t i); + double& z(size_t i); + double& fx(size_t i); + double& fy(size_t i); + double& fz(size_t i); + int& atomicNumber(size_t i); + + // SoA references for HPC (const) + const double& x(size_t i) const; + const double& y(size_t i) const; + const double& z(size_t i) const; + const double& fx(size_t i) const; + const double& fy(size_t i) const; + const double& fz(size_t i) const; + const int& atomicNumber(size_t i) const; + + // SoA views for HPC (non-const) + Vec3dSoAView positionView(size_t i); + Vec3dSoAView forceView(size_t i); + + // -------------------------------------------------- + // Minimal I/O + // -------------------------------------------------- + int read(std::ifstream &ifs); + void save(std::ofstream &ofs) const; - /** Total energy of this structure */ - double energy; + // -------------------------------------------------- + // Forward iterator for range-based for + // -------------------------------------------------- + class iterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = Atom; + using difference_type = std::ptrdiff_t; + using pointer = Atom*; + using reference = Atom; + + iterator(Structure* structure = nullptr, size_t index = 0) + : structure_(structure), index_(index) + {} + + // Declaration only; definition is in structure_inl.h + reference operator*() const; + + // Pre-increment + iterator& operator++() { + ++index_; + return *this; + } - /** Lattice vectors. */ - Matrix3d cell; + // Post-increment + iterator operator++(int) { + iterator tmp(*this); + ++(*this); + return tmp; + } - /** Virial stress tensor. */ - stress_type stress; + bool operator==(const iterator &other) const { + return (structure_ == other.structure_) && (index_ == other.index_); + } + bool operator!=(const iterator &other) const { + return !(*this == other); + } + + private: + Structure* structure_ = nullptr; + size_t index_ = 0; + }; + + // Similarly, a const_iterator for read-only usage + class const_iterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = Atom; + using difference_type = std::ptrdiff_t; + using pointer = const Atom*; + using reference = const Atom; + + const_iterator(const Structure* structure = nullptr, size_t index = 0) + : structure_(structure), index_(index) + {} + + // Declaration only; definition is in structure_inl.h + reference operator*() const; + + // Pre-increment + const_iterator& operator++() { + ++index_; + return *this; + } - /** Container for atoms which belong to this structure */ - std::vector<Atom> atoms; - - /** Temperature of this structure. - * - * Default is 0.0 - */ - double T=0; - - - /** Periodic image flag for neigherest neighbours. - * - * Indicate which periodic image the neigbouring atom belongs to. - * - * Example in two dimensions for illustration purposes: - * - * -1, 1 | 0, 1 | 1, 1 - * ------------------- - * -1, 0 | 0, 0 | 1, 0 - * ------------------- - * -1,-1 | 0,-1 | 1,-1 - * - * So [0,0] corresponds to the atom in the main box and [1,0] to periodic - * shift in the x-direction. Obviously we use three dimensions so this - * corresponds to [0,0,0] and [1,0,0]. - * - * Note that there might be more than one periodic image in each direction. - * This depends on the box size and the cutoff distance used. - */ - std::vector<std::vector<Vec3d>> near_neigh_shift; - - /** List of global indices for nearest neighbours for every atom */ - std::vector<std::vector<size_t>> near_neigh_idx; - - /** Weighting parameter for energy used during training. - * - * Default is 1.0 - */ - double eweight=1.0; - - /** Weighting parameter for forces used during training. - * - * Default is 1.0 - */ - double fweight=1.0; - - /** Weighting parameter for stress used during training. - * - * Default is 1.0 - */ - double sweight=1.0; - - - /** @return a reference to the i-th Atom. - * - * Usage example: - * - * # Get a reference to the 3rd Atom in the st Structure - * const Atom &atom = st(2); - */ - const Atom& operator()(const size_t i) const; - - /** Add Atom a to this structure. */ - void add_atom(Atom a); - - /** Remove an Atom in the i-th position from this structure. */ - void remove_atom(const size_t i); - - /** @return a number of atoms in this structure. */ - size_t natoms() const; + // Post-increment + const_iterator operator++(int) { + const_iterator tmp(*this); + ++(*this); + return tmp; + } + + bool operator==(const const_iterator &other) const { + return (structure_ == other.structure_) && (index_ == other.index_); + } + bool operator!=(const const_iterator &other) const { + return !(*this == other); + } + + private: + const Structure* structure_ = nullptr; + size_t index_ = 0; + }; + + inline iterator begin() { return iterator(this, 0); } + inline iterator end() { return iterator(this, size_); } + + inline const_iterator begin() const { return const_iterator(this, 0); } + inline const_iterator end() const { return const_iterator(this, size_); } /** @return volume of this structure. */ - double get_volume() const; + double get_volume() const { + return cell.row(0)*(cell.row(1).cross(cell.row(2))); + } /** @return density of this structure in g/cm^3 */ double get_density() const; /** @return temperature of this structure in K */ - double get_temperature() const; - - /** @return virial pressure calculated from the stress tensor. - * - * Units: energy/distance^3 - */ - double get_virial_pressure() const; - - /** @return pressure for a given temperature. - * - * The calculated pressure contains both ideal gas - * and virial pressures. - * - * Default units Ev/Angstrom^3. - * - * @param kB Boltzmann Constant. Default in ev/K. - * - * For T=0 returns virial pressure only. - */ - double get_pressure(const double T, const double kB=8.617333262145e-5) const; - -/** - * Return the position of the n-th neighbor of atom i, - * computed via the neighbor's global index and periodic shifts. - * - * Assumes: - * near_neigh_idx[i][n] -> global index of neighbor - * near_neigh_shift[i][n] -> integer triple (n1, n2, n3) OR real shift - * - * Returns by value (Vec3d) so we don't reference a temporary. - */ - Vec3d nn_pos(const size_t i, const size_t n) const; - - /** @return a number of nearest neighbours of the i-th Atom. */ - size_t nn_size(const size_t i) const; - - /** Read structure from the stream. - * - * If a stream contains more than one structure, - * the first structure will be read. - * - * @returns 0 if success, 1 otherwise - */ - int read(std::ifstream &ifs); - - /** Read a structure from the file. - * - * If a file contains more than one structure, - * the first structure will be read. - */ - void read(const std::string fn); - - /** Save structure to the stream. */ - void save(std::ofstream &ofs) const; + double get_temperature() const { return T; } + +private: + friend class StructureDB; + StructureDB* db_ = nullptr; + size_t offset_ = 0; + size_t size_ = 0; + size_t capacity_ = 0; +}; - /** Save structure to the file. - * - * @warning - * This will overwrite any existing data in the fn file. - */ - void save(const std::string fn) const; - - /** Return corresponding ii index of i-j (jj) atom interaction. - * - * The ii index is used with nearest neighour lists. - * i,j are atom global indices; ii,jj are neighbour lists indices. - * - * This method is required to fully specify interacton of two atoms - * when predicting force on the i-th atom. In particular in the case when - * atom i-j interaction is specified more than once. e.g. one i-j interaction - * in the main box and another one coming from the periodic image. - * The critical condition for this to happen for the square box is: - * rcut > box length / 2. - * - * e.g. - * To get a force on atom i. We loop over nn of i to obtain jj indices. - * For many-body potentials we also need to know the density for atom at jj, - * hence we need j index. This is stored by: - * - * j=near_neigh_idx[a][jj] - * - * Moreover we need to know derivatives (descriptor derivatives wrt to rij) - * at both i and j atoms. However if jj atom is outside of the main box - * we do not calculate derivative for this atom but we can use derivative - * for the j-ii interaction. Hence we need to find ii index here... - * - * - * ii = get_nn_iindex(i,j,jj) - * - * phew... - */ - size_t get_nn_iindex(const size_t i, const size_t j, const size_t jj) const; - - /** Print structure summary using streams. */ - friend std::ostream& operator<<(std::ostream& os, const Structure& st) - { - os << "Structure : " << std::left << st.label << std::endl; - os << "Weights : " << std::left - << "E: "<< st.eweight - << " | F: " << std::left << st.fweight - << " | S: " << std::left << st.sweight << std::endl; - os << "# atoms : " << std::left << st.atoms.size() << std::endl; - os << "Energy :" << std::right << std::setw(w) - << std::setprecision(p) << std::fixed << st.energy << std::endl; - os << "Cell :" << std::endl; - for (int i=0; i<3; ++i) { - os << std::right << std::fixed << std::setw(w+10) - << std::setprecision(p) << st.cell(i,0); - os << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << st.cell(i,1); - os << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << st.cell(i,2); - os << std::endl; - } - os << "Stress :" << std::endl; - for (int i=0; i<3; ++i) { - os << std::right << std::fixed << std::setw(w+10) - << std::setprecision(p) << st.stress(i,0); - os << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << st.stress(i,1); - os << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << st.stress(i,2); - os << std::endl; - } - return os; - } - /** Return true if both structures are the same. - * - * This operator compares cell, stress tensor, number of atoms, - * eweight, fweight and sweight, all atom positions and forces. - * - * Assumes atoms have the same order. - * - * It does NOT compare labels. - */ - bool operator==(const Structure& st) const; - - /** Return true if both structures are the same. - * - * This method compares cell vectors, number of atoms, - * species and atomic coordinates - * - * Order ot atoms does not matter. - * - * It does NOT compare energies, stresses, forces and labels. - */ - bool is_the_same(const Structure& st, double thr=1e-6) const; - - // move iterator forward to the next structure - // return number of atoms in the structure - // return 0 if there is no more structures - static int next_structure(std::ifstream &ifs); - - /** Clear NN list */ - void clear_nn(); - - // Methods to enable range-based for loop - std::vector<Atom>::iterator begin(); - std::vector<Atom>::iterator end(); - std::vector<Atom>::const_iterator begin() const; - std::vector<Atom>::const_iterator end() const; - - /** Method to dump class content to a file */ - void dump_to_file(std::ostream &file, size_t prec=12) const; - - /** Method to dump class content to a file */ - void dump_to_file(const std::string& filepath, size_t prec=12) const; - - /** List of chemical elements in this structure. */ - std::set<Element> get_unique_elements() const; +} // namespace mlip +} // namespace tadah - private: - const static size_t w=15; // controls output width - const static size_t p=6; // controls output precision - +#endif // TADAH_MLIP_STRUCTURE_H -}; -#endif diff --git a/include/tadah/mlip/structure_db.h b/include/tadah/mlip/structure_db.h index 0cab4a666f1a656d54b9e0578c8685a86c149933..a0bc3443cb14b5e467ed64e5a77f4016c6ce1be5 100644 --- a/include/tadah/mlip/structure_db.h +++ b/include/tadah/mlip/structure_db.h @@ -1,183 +1,157 @@ -#ifndef STRUCTURE_DB_h -#define STRUCTURE_DB_h - -#include "tadah/core/utils.h" -#include <tadah/core/element.h> -#include <tadah/core/config.h> -#include <tadah/mlip/structure.h> +#pragma once +#ifndef TADAH_MLIP_STRUCTURE_DB_H +#define TADAH_MLIP_STRUCTURE_DB_H +#include <vector> #include <string> -#include <iostream> #include <fstream> -#include <vector> -#include <set> +#include <iostream> +#include <sstream> +#include <cctype> +#include <stdexcept> + +#ifdef _OPENMP +#include <omp.h> // Optional if parallelizing +#endif + +#include <tadah/core/config.h> // tadah::core::Config +#include <tadah/mlip/structure.h> // tadah::mlip::Structure + +namespace tadah { +namespace mlip { /** - * Container for a collection of Structure(s). - * - * Usage example: - * - * # Default constructor creates an empty StructureDB object. - * StructureDB stdb; - * # Add structures from the file "db.dat" - * stdb.add("db.dat"); - * - * Usage example: + * @brief Holds a set of structures loaded from multiple files, using a two-pass approach. * - * # Load all structure files listed in a Config file - * Config config("config"); - * StructureDB stdb(config); - * - * Usage example: - * - * # Print this object summary using streams: - * std::cout << stdb; - * - * # Print 3rd Atom data from the 1st Structure - * std::cout << stdb(0,2); - * - * @see Structure NNFinder + * The approach: + * 1) Parse how many structures and how many atoms each structure has (pass1). + * 2) Allocate SoA arrays for HPC storage (X_,Y_,Z_, etc.). + * 3) Re-read each file to fill data for each structure (pass2). * + * Each file can contain multiple structures separated by blank lines. + * Each structure has a 9-line header plus N atom lines. The HPC arrays store + * positions, forces, and atomic numbers in contiguous format. */ -struct StructureDB { - std::vector<Structure> structures; - - /** Create an empty StructureDB object. */ - StructureDB(); +class StructureDB { +public: - /** Create this object and load structures - * listed in the config file + /** + * @brief Stores pass1 results such as how many structures are in each file, + * how many atoms each structure contains, and which files are included. * - * \note - * Required Config key: \ref DBFILE + * Permits loading a certain subset of structures from large input files by + * specifying which ranges are included. */ - StructureDB(Config &config); + struct Pass1Info { + Pass1Info() = default; + Pass1Info(const std::vector<std::string> &files, + const std::vector<std::size_t> &nStructuresPerFile, + const std::vector<std::vector<std::size_t>> &atomsPerStructurePerFile) + : files(files), nStructuresPerFile(nStructuresPerFile), atomsPerStructurePerFile(atomsPerStructurePerFile) + { + firstStructureIndexPerFile.resize(files.size(), 0); + } + Pass1Info(const std::vector<std::string> &files, + const std::vector<std::size_t> &nStructuresPerFile, + const std::vector<std::vector<std::size_t>> &atomsPerStructurePerFile, + const std::vector<std::size_t> &firstStructureIndexPerFile): + files(files), nStructuresPerFile(nStructuresPerFile), + atomsPerStructurePerFile(atomsPerStructurePerFile), + firstStructureIndexPerFile(firstStructureIndexPerFile) {} + + std::vector<std::string> files; + std::vector<std::size_t> nStructuresPerFile; + std::vector<std::size_t> firstStructureIndexPerFile; + std::vector<std::vector<std::size_t>> atomsPerStructurePerFile; + }; - /** Add structures listed in the config file - * - * \note - * Required Config key: \ref DBFILE - * +private: + /** + * @brief HEADERSIZE_ references the 9 lines forming a header in each structure file block. */ - void add(Config &config); - - /** Add all structures from a file */ - void add(const std::string fn); + static constexpr std::size_t HEADERSIZE_ = 9; - /** Add N structures from a file begining from first index. - * - * Indexing starts from zero, i.e. - * first=0 corresponds to the first structure in the file. - * - * In the case of N greater than the number of available - * structures, this method will load only the available ones. - * - * Return the number of loaded structures. + /** + * @brief BUFSIZE_ sets a 100MB read buffer to accelerate file I/O for large data. */ - int add(const std::string fn, size_t first, int N); + static constexpr std::size_t BUFSIZE_ = (100ULL << 20); - /** Add a single Structure object to this container */ - void add(const Structure &s); - - /** Add all structure from other Structure object to this container */ - void add(const StructureDB &stdb); - - /** remove i-th Structure object from this container */ - void remove(size_t i); - - /** \return number of structures held by this object */ - size_t size() const; - - /** \return number of structures in the n-th DBFILE - * - * n={0,...,number of DBFILEs-1} + /** + * @brief TANTALUM_Z_ used as a dummy atomic number for "Ta" examples. */ - size_t size(size_t n) const; + static constexpr int TANTALUM_Z_ = 73; - /** \return reference to the s-th structure - * - * Usage example: - * - * # Get reference to the 2nd structure held be this object - * # and bind it to Structure st. - * Structure &st = st(1); - */ - Structure& operator()(size_t s); - const Structure& operator()(size_t s) const; +public: + std::vector<double> X_, Y_, Z_; + std::vector<double> FX_, FY_, FZ_; + std::vector<int> atomicNumber_; - /** \return reference to the a-th atom in the s-th structure - * - * Usage example: - * - * # Get reference to the 5th atom in the 3rd structure - * # held by this object and bind it to the atom object. - * Atom &atom = st(2,4); - */ - Atom& operator()(size_t s, size_t a); +private: + std::vector<Structure> structures_; + std::size_t usedAtoms_ = 0; - /** Print this object summary to the stream */ - friend std::ostream& operator<<(std::ostream& os, const StructureDB& stdb) - { - os << stdb.summary(); - return os; - } - std::string summary() const; + std::vector<std::vector<std::size_t>> structureAtomCountsPerFile_; + std::vector<std::size_t> fileStructureOffset_; - /** Store indices for each dataset. - * - * e.g. if 3 datasets of sizes 11,13,15 - * dbidx=={0,11,24,39} +public: + /** + * @brief Constructs by reading config for DB files, performs two-pass fill (if desired). */ - std::vector<size_t> dbidx; - - /** Calculate total number of atoms stored by this object. */ - size_t calc_natoms() const; + inline explicit StructureDB(Config &config); - /** Calculate total number of atoms in the n-th DBFILE. - * - * n={0,...,number of DBFILEs-1} + /** + * @brief Constructs from external pass1 info, then allocates HPC arrays accordingly. */ - size_t calc_natoms(size_t n) const; + inline StructureDB(const Pass1Info &pinfo); - /** Return unique elements for all Structures. */ - std::set<Element> get_unique_elements() const; + // Returns total number of structures across all files + inline std::size_t size() const { return structures_.size(); } - /** Find unique elements in provided Config file */ - static std::set<Element> find_unique_elements(const Config &c); + // Returns how many files were processed + inline std::size_t nFiles() const { return structureAtomCountsPerFile_.size(); } - /** Find unique elements in provided file */ - static std::set<Element> find_unique_elements(const std::string &fn); + // Returns how many structures are found in file f + inline std::size_t nStructuresInFile(std::size_t f) const; - /** Count number of structures and atoms in all datasets from the Config file. */ - static std::pair<int,int> count(const Config &c); + // Access to structures by overall index or by (file, structureIndexInFile) + inline Structure& operator()(std::size_t i); + inline const Structure& operator()(std::size_t i) const; - /** Count number of structures and atoms in a single dataset. */ - static std::pair<int,int> count(const std::string fn); + inline Structure& operator()(std::size_t f, std::size_t j); + inline const Structure& operator()(std::size_t f, std::size_t j) const; - void clear_nn(); + // Summary for debug + inline std::string summary() const { + std::ostringstream oss; + oss << "StructureDB: " << size() << " structures total\n"; + return oss.str(); + } - /** Check consistency of the ATOMS key. */ - static void check_atoms_key(Config &config, std::set<Element> &unique_elements); +private: + /** + * @brief Pass1 parses each file quickly to identify how many structures + * and how many lines each structure contains. + */ + inline std::vector<std::size_t> pass1ParseStructures(const std::string &filename) const; - /** Check consistency of the WATOMS key. Add if missing*/ - static void check_watoms_key(Config &config, std::set<Element> &unique_elements); + /** + * @brief Pass2 re-opens each file, skips the 9-line header, and reads + * the atom lines, storing them in HPC SoA arrays. + */ + inline void pass2FillStructures(std::size_t fIndex, + std::size_t offsetS, + const std::string &ifile); - // Methods to enable range-based for loop - std::vector<Structure>::iterator begin(); - std::vector<Structure>::iterator end(); - std::vector<Structure>::const_iterator begin() const; - std::vector<Structure>::const_iterator end() const; + inline bool isBlankLine(const std::string& line) const; - /** Method to dump class content to a file */ - void dump_to_file(const std::string& filepath, size_t prec=12) const; + /** + * @brief Converts a symbol to an atomic number. Returns TANTALUM_Z_ if "Ta," else returns 0. + */ + inline int toAtomicNumber(const std::string &sym) const; +}; - // Public method that reads the file, counts blocks and line counts per block, - // then prints the results to std::cout. - void parseFile(const std::string& filename); +} // namespace mlip +} // namespace tadah -private: - // Checks if a line is empty or contains only whitespace - bool isBlankLine(const std::string& line) const; +#endif // TADAH_MLIP_STRUCTURE_DB_H -}; -#endif diff --git a/include/tadah/mlip/structure_inl.h b/include/tadah/mlip/structure_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..29e320b293625599c43bdfaeb502a3f72d5d61f6 --- /dev/null +++ b/include/tadah/mlip/structure_inl.h @@ -0,0 +1,180 @@ +#pragma once +#ifndef TADAH_MLIP_STRUCTURE_INL_H +#define TADAH_MLIP_STRUCTURE_INL_H + +#include <tadah/mlip/structure.h> +#include <tadah/mlip/atom.h> +#include <tadah/mlip/structure_db.h> +#include <tadah/core/vec3d_soa_view.h> +#include <sstream> + +namespace tadah { +namespace mlip { + +// Inline definitions of Structure methods that reference Atom or SoA: + +inline Structure::Structure() + : db_(nullptr), offset_(0), size_(0), capacity_(0) +{} + +inline Structure::Structure(StructureDB* db, size_t offset, size_t capacity) + : db_(db), offset_(offset), size_(0), capacity_(capacity) +{} + +inline Atom Structure::operator()(size_t i) { + return Atom(this, i); +} +inline const Atom Structure::operator()(size_t i) const { + // Const-cast is used so Atom can reference internal data. + // The handle itself does not permit non-const modifications unless + // the structure is also non-const. + return Atom(const_cast<Structure*>(this), i); +} + +inline void Structure::addAtom(int Z, double xVal, double yVal, double zVal, + double fxVal, double fyVal, double fzVal) +{ + if (size_ >= capacity_) { + throw std::runtime_error("Structure::addAtom: capacity exceeded"); + } + size_t idx = size_; + x(idx) = xVal; + y(idx) = yVal; + z(idx) = zVal; + fx(idx) = fxVal; + fy(idx) = fyVal; + fz(idx) = fzVal; + atomicNumber(idx) = Z; + size_++; +} + +// SoA references (non-const) +inline double& Structure::x(size_t i) { return db_->X_[offset_ + i]; } +inline double& Structure::y(size_t i) { return db_->Y_[offset_ + i]; } +inline double& Structure::z(size_t i) { return db_->Z_[offset_ + i]; } +inline double& Structure::fx(size_t i) { return db_->FX_[offset_ + i]; } +inline double& Structure::fy(size_t i) { return db_->FY_[offset_ + i]; } +inline double& Structure::fz(size_t i) { return db_->FZ_[offset_ + i]; } +inline int& Structure::atomicNumber(size_t i) { return db_->atomicNumber_[offset_ + i]; } + +// SoA references (const) +inline const double& Structure::x(size_t i) const { return db_->X_[offset_ + i]; } +inline const double& Structure::y(size_t i) const { return db_->Y_[offset_ + i]; } +inline const double& Structure::z(size_t i) const { return db_->Z_[offset_ + i]; } +inline const double& Structure::fx(size_t i) const { return db_->FX_[offset_ + i]; } +inline const double& Structure::fy(size_t i) const { return db_->FY_[offset_ + i]; } +inline const double& Structure::fz(size_t i) const { return db_->FZ_[offset_ + i]; } +inline const int& Structure::atomicNumber(size_t i) const { return db_->atomicNumber_[offset_ + i]; } + +inline Vec3dSoAView Structure::positionView(size_t i) +{ + return Vec3dSoAView( x(i), y(i), z(i) ); +} +inline Vec3dSoAView Structure::forceView(size_t i) +{ + return Vec3dSoAView( fx(i), fy(i), fz(i) ); +} + +inline int Structure::read(std::ifstream &ifs) +{ + if (!ifs.good()) return 1; + std::string line; + std::getline(ifs, line); + if (line.empty()) return 1; + + label = line; // Reads label from first line + + // Next line includes energy and T + { + std::getline(ifs, line); + if (line.empty()) return 1; + std::stringstream ss(line); + ss >> energy >> T; + } + + // Reads cell from next 3 lines + for (int i=0; i<3; ++i){ + for (int j=0; j<3; ++j){ + ifs >> cell(i,j); + } + } + + // Reads stress matrix from next 3 lines + for (int i=0; i<3; ++i){ + for (int j=0; j<3; ++j){ + ifs >> stress(i,j); + } + } + // flush line + std::getline(ifs, line); + + // Parse atoms + while (true) { + std::streampos pos = ifs.tellg(); + if (!std::getline(ifs, line)) break; + if (line.empty()) break; + std::stringstream tmp(line); + int zVal; double xx,yy,zz, fxx,fyy,fzz; + tmp >> zVal >> xx >> yy >> zz >> fxx >> fyy >> fzz; + if (tmp.fail()) { + // revert + ifs.seekg(pos); + break; + } + addAtom(zVal, xx, yy, zz, fxx, fyy, fzz); + } + return 0; +} + +inline void Structure::save(std::ofstream &ofs) const +{ + ofs << label << "\n"; + ofs << energy << " " << T << "\n"; + // Writes cell + for (int i=0; i<3; ++i) { + for (int j=0; j<3; ++j) { + ofs << cell(i,j) << " "; + } + ofs << "\n"; + } + // Writes stress + for (int i=0; i<3; ++i) { + for (int j=0; j<3; ++j) { + ofs << stress(i,j) << " "; + } + ofs << "\n"; + } + // Writes atom data + for (size_t i=0; i<size_; i++) { + ofs << atomicNumber(i) << " " + << x(i) << " " << y(i) << " " << z(i) << " " + << fx(i) << " " << fy(i) << " " << fz(i) << "\n"; + } + ofs << "\n"; +} + +// Iterator stuff for range-based for +inline Structure::iterator::reference +Structure::iterator::operator*() const { + return (*structure_)(index_); +} + +inline Structure::const_iterator::reference +Structure::const_iterator::operator*() const { + return (*structure_)(index_); +} + +inline double Structure::get_density() const { + double V = get_volume(); + V*=1e-24; // convert to cm^3 + double amu = 1.66053906660e-24; // grams + double mass = 0; + for (const auto& a:*this) mass += PeriodicTable::get_mass(a.atomicNumber()); + return amu*mass/V; +} + +} // namespace mlip +} // namespace tadah + +#endif // TADAH_MLIP_STRUCTURE_INL_H + diff --git a/include/tadah/mlip/trainer.h b/include/tadah/mlip/trainer.h index 9bbf598130b961ffbbf1ad5e82a3dceba24eb985..ef20679ba3a99c4c9bf1a8897659379a80d9ea8a 100644 --- a/include/tadah/mlip/trainer.h +++ b/include/tadah/mlip/trainer.h @@ -13,6 +13,8 @@ #include <iostream> +namespace tadah { +namespace mlip { class Trainer { public: Config config; @@ -630,5 +632,7 @@ class TrainerWorker: public MPI_Trainer { } } }; +} +} #endif #endif diff --git a/src/analytics.cpp b/src/analytics.cpp index 57d1dca8ea035056caf01c8aa65f34a3292b920a..d37ac2458feec8b7931968dc28dcadffd8064ed7 100644 --- a/src/analytics.cpp +++ b/src/analytics.cpp @@ -1,247 +1,345 @@ +#include <tadah/mlip/atom.h> #include <tadah/mlip/analytics/analytics.h> #include <tadah/mlip/analytics/statistics.h> +#include <cmath> // for std::abs, std::sqrt, std::pow +#include <stdexcept> -Analytics::Analytics(const StructureDB &st, const StructureDB &stp): - st(st), +namespace tadah { +namespace mlip { + +Analytics::Analytics(const StructureDB &st, const StructureDB &stp) + : st(st), stp(stp) { - if (st.size()!=stp.size()) + if (st.size() != stp.size()) { throw std::runtime_error("Containers differ in size."); + } } -t_type Analytics::calc_e_mae() const{ - - t_type emae_vec(st.dbidx.size()-1); - double emae=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - // assume stp(i).natoms()==st(i).natoms() - emae += std::abs(st(i).energy - stp(i).energy)/st(i).natoms(); - N++; - if (i+1==st.dbidx[dbidx+1]) { - emae_vec(dbidx)=emae/N; - emae=0; - N=0; - dbidx++; +/** + * @brief Compute per-file MAE of energy (in eV/atom). + * Uses the new approach that sums over all structures in each file f. + */ +t_type Analytics::calc_e_mae() const +{ + // We return a t_type with length st.nFiles(), + // each element is the MAE for one file. + t_type emae_vec(st.nFiles()); + + for (size_t f = 0; f < st.nFiles(); ++f) + { + double emae = 0.0; + size_t N = 0; + size_t nStructs = st.nStructuresInFile(f); + + for (size_t i = 0; i < nStructs; ++i) { + // st(f,i) and stp(f,i) must have the same #atoms + double e1 = st(f,i).energy; + double e2 = stp(f,i).energy; + size_t na = st(f,i).natoms(); + if (na == 0) continue; // avoid divide-by-zero + + emae += std::abs(e1 - e2) / na; + N++; } + // average over the structures in file f + if (N > 0) emae /= N; + emae_vec(f) = emae; } return emae_vec; } -t_type Analytics::calc_f_mae() const{ +/** + * @brief Compute per-file MAE of forces (in eV/Å). + */ +t_type Analytics::calc_f_mae() const +{ + t_type fmae_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + double fmae = 0.0; + size_t N = 0; // total # of force components aggregated + + size_t nStructs = st.nStructuresInFile(f); + for (size_t i = 0; i < nStructs; ++i) { + const auto& s1 = st(f,i); + const auto& s2 = stp(f,i); - t_type fmae_vec(st.dbidx.size()-1); - double fmae=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - // assume stp(i).natoms()==st(i).natoms() + // sum over each atom’s 3 force components + for (size_t a = 0; a < s1.natoms(); ++a) { + double fx1 = s1(a).fx(); + double fy1 = s1(a).fy(); + double fz1 = s1(a).fz(); + double fx2 = s2(a).fx(); + double fy2 = s2(a).fy(); + double fz2 = s2(a).fz(); - for (size_t a=0; a<st(i).natoms(); ++a) { - for (size_t k=0; k<3; ++k) { - fmae += std::abs(st(i).atoms[a].force(k) - stp(i).atoms[a].force(k)); - N++; + fmae += std::abs(fx1 - fx2); + fmae += std::abs(fy1 - fy2); + fmae += std::abs(fz1 - fz2); + N += 3; } } - - if (i+1==st.dbidx[dbidx+1]) { - fmae_vec(dbidx)=fmae/N; - fmae=0; - N=0; - dbidx++; - } + if (N > 0) fmae /= N; + fmae_vec(f) = fmae; } return fmae_vec; } -t_type Analytics::calc_s_mae() const { +/** + * @brief Compute per-file MAE of stress (in eV/ų or whichever units). + * We only collect the symmetric components (x,y) for x <= y. + */ +t_type Analytics::calc_s_mae() const +{ + t_type smae_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + double smae = 0.0; + size_t N = 0; // number of stress components collected - t_type smae_vec(st.dbidx.size()-1); - double smae=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - for (size_t x=0; x<3; ++x) { - for (size_t y=x; y<3; ++y) { - smae += std::abs(st(i).stress(x,y) - stp(i).stress(x,y)); - N++; + size_t nStructs = st.nStructuresInFile(f); + for (size_t i = 0; i < nStructs; ++i) { + const auto& s1 = st(f,i); + const auto& s2 = stp(f,i); + + for (size_t x = 0; x < 3; ++x) { + for (size_t y = x; y < 3; ++y) { + double val1 = s1.stress(x,y); + double val2 = s2.stress(x,y); + smae += std::abs(val1 - val2); + N++; + } } } - if (i+1==st.dbidx[dbidx+1]) { - smae_vec(dbidx)=smae/N; - smae=0; - N=0; - dbidx++; - } + if (N > 0) smae /= N; + smae_vec(f) = smae; } return smae_vec; } -t_type Analytics::calc_e_rrmse() const { - t_type errmse_vec(st.dbidx.size()-1); - double errmse=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - if (st(i).energy != 0.0) { - errmse += std::pow((st(i).energy - stp(i).energy)/st(i).energy,2); - N++; - } - if (i+1==st.dbidx[dbidx+1]) { - errmse_vec(dbidx)=std::sqrt(errmse/N); - errmse=0; - N=0; - dbidx++; +/** + * @brief Compute per-file relative RMSE of energy: + * sqrt( 1/N * ∑ [ (E1 - E2)/E1 ]² ). + */ +t_type Analytics::calc_e_rrmse() const +{ + t_type errmse_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + double sumSq = 0.0; // sum of squares + size_t N = 0; + size_t nStructs = st.nStructuresInFile(f); + + for (size_t i = 0; i < nStructs; ++i) { + double Eref = st(f,i).energy; + double Epred = stp(f,i).energy; + + // skip if Eref == 0 to avoid dividing by zero + if (std::abs(Eref) > 1e-14) { + double diffRel = (Eref - Epred)/Eref; + sumSq += diffRel * diffRel; + N++; + } + } + double val = (N > 0) ? std::sqrt(sumSq / N) : 0.0; + errmse_vec(f) = val; } - } - return errmse_vec; + return errmse_vec; } -t_type Analytics::calc_e_rmse() const{ - - t_type ermse_vec(st.dbidx.size()-1); - double ermse=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - // assume stp(i).natoms()==st(i).natoms() - ermse += std::pow((st(i).energy - stp(i).energy)/st(i).natoms(),2); - N++; - if (i+1==st.dbidx[dbidx+1]) { - ermse_vec(dbidx)=std::sqrt(ermse/N); - ermse=0; - N=0; - dbidx++; +/** + * @brief Compute per-file RMSE of energy (in eV/atom). + */ +t_type Analytics::calc_e_rmse() const +{ + t_type ermse_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + double sumSq = 0.0; + size_t N = 0; + size_t nStructs = st.nStructuresInFile(f); + + for (size_t i = 0; i < nStructs; ++i) { + double Eref = st(f,i).energy; + double Epred = stp(f,i).energy; + size_t na = st(f,i).natoms(); + if (na == 0) continue; + + double diff = (Eref - Epred) / na; + sumSq += diff * diff; + N++; } + double val = (N > 0) ? std::sqrt(sumSq / N) : 0.0; + ermse_vec(f) = val; } return ermse_vec; } -t_type Analytics::calc_f_rmse() const{ +/** + * @brief Compute per-file RMSE of forces (in eV/Å). + */ +t_type Analytics::calc_f_rmse() const +{ + t_type frmse_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + double sumSq = 0.0; + size_t N = 0; - t_type frmse_vec(st.dbidx.size()-1); - double frmse=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - // assume stp(i).natoms()==st(i).natoms() + size_t nStructs = st.nStructuresInFile(f); + for (size_t i = 0; i < nStructs; ++i) { + const auto & s1 = st(f,i); + const auto & s2 = stp(f,i); - for (size_t a=0; a<st(i).natoms(); ++a) { - for (size_t k=0; k<3; ++k) { - frmse += std::pow(st(i).atoms[a].force(k) - stp(i).atoms[a].force(k),2); - N++; - } - } + for (size_t a = 0; a < s1.natoms(); ++a) { + double fx1 = s1(a).fx(), fx2 = s2(a).fx(); + double fy1 = s1(a).fy(), fy2 = s2(a).fy(); + double fz1 = s1(a).fz(), fz2 = s2(a).fz(); - if (i+1==st.dbidx[dbidx+1]) { - frmse_vec(dbidx)=sqrt(frmse/N); - frmse=0; - N=0; - dbidx++; + sumSq += (fx1 - fx2)*(fx1 - fx2); + sumSq += (fy1 - fy2)*(fy1 - fy2); + sumSq += (fz1 - fz2)*(fz1 - fz2); + N += 3; + } } + if (N > 0) sumSq /= N; + frmse_vec(f) = (N > 0) ? std::sqrt(sumSq) : 0.0; } return frmse_vec; } -t_type Analytics::calc_s_rmse() const { +/** + * @brief Compute per-file RMSE of stress (in eV/Å^3). + */ +t_type Analytics::calc_s_rmse() const +{ + t_type srmse_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + double sumSq = 0.0; + size_t N = 0; - t_type srmse_vec(st.dbidx.size()-1); - double srmse=0; - size_t dbidx=0; - size_t N=0; - for (size_t i=0; i<st.size(); ++i) { - for (size_t x=0; x<3; ++x) { - for (size_t y=x; y<3; ++y) { - srmse += std::pow(st(i).stress(x,y) - stp(i).stress(x,y),2); - N++; + size_t nStructs = st.nStructuresInFile(f); + for (size_t i = 0; i < nStructs; ++i) { + const auto & s1 = st(f,i); + const auto & s2 = stp(f,i); + + for (size_t x = 0; x < 3; ++x) { + for (size_t y = x; y < 3; ++y) { + double val1 = s1.stress(x,y); + double val2 = s2.stress(x,y); + double diff = val1 - val2; + sumSq += diff * diff; + N++; + } } } - if (i+1==st.dbidx[dbidx+1]) { - srmse_vec(dbidx)=std::sqrt(srmse/N); - srmse=0; - N=0; - dbidx++; - } + if (N > 0) sumSq /= N; + srmse_vec(f) = (N > 0) ? std::sqrt(sumSq) : 0.0; } return srmse_vec; } -t_type Analytics::calc_e_r_sq() const { - t_type e_r_sq_vec(st.dbidx.size()-1); - size_t dbidx=0; - t_type obs(st.size(dbidx)); - t_type pred(st.size(dbidx)); - size_t idx=0; - for (size_t i=0; i<st.size(); ++i) { - obs(idx) = st(i).energy/st(i).natoms(); - pred(idx) = stp(i).energy/st(i).natoms(); - idx++; - if (i+1==st.dbidx[dbidx+1]) { - e_r_sq_vec(dbidx) = Statistics::r_sq(obs,pred); - dbidx++; - if (dbidx<st.dbidx.size()-2) { - obs.resize(st.size(dbidx)); - pred.resize(st.size(dbidx)); - } - idx=0; +/** + * @brief Compute per-file R² of energy (comparing E/atom in each structure). + */ +t_type Analytics::calc_e_r_sq() const +{ + t_type e_r_sq_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + size_t nStructs = st.nStructuresInFile(f); + // Build vectors of observed and predicted (E/atom) + t_type obs(nStructs), pred(nStructs); + + for (size_t i = 0; i < nStructs; ++i) { + double Eref = st(f,i).energy; + double Epred= stp(f,i).energy; + size_t na = st(f,i).natoms(); + double denom = (na == 0 ? 1.0 : double(na)); + + obs(i) = Eref / denom; + pred(i) = Epred / denom; } + e_r_sq_vec(f) = Statistics::r_sq(obs, pred); } return e_r_sq_vec; } -t_type Analytics::calc_f_r_sq() const { - t_type f_r_sq_vec(st.dbidx.size()-1); - size_t dbidx=0; - t_type obs(3*st.calc_natoms(dbidx)); - t_type pred(3*st.calc_natoms(dbidx)); - size_t idx=0; - for (size_t i=0; i<st.size(); ++i) { - for (size_t a=0; a<st(i).natoms(); ++a) { - for (size_t k=0; k<3; ++k) { - obs(idx) = st(i).atoms[a].force(k); - pred(idx) = stp(i).atoms[a].force(k); - idx++; - } +/** + * @brief Compute per-file R² of forces (flattening all atoms’ fx,fy,fz). + */ +t_type Analytics::calc_f_r_sq() const +{ + t_type f_r_sq_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + // First, compute total # of force components (3 * all atoms in file f) + size_t totalAtoms = 0; + size_t nStructs = st.nStructuresInFile(f); + for (size_t i = 0; i < nStructs; ++i) { + totalAtoms += st(f,i).natoms(); } - if (i+1==st.dbidx[dbidx+1]) { - f_r_sq_vec(dbidx) = Statistics::r_sq(obs,pred); - dbidx++; - if (dbidx<st.dbidx.size()-2) { - obs.resize(3*st.calc_natoms(dbidx)); - pred.resize(3*st.calc_natoms(dbidx)); + size_t nForceComps = 3 * totalAtoms; + + t_type obs(nForceComps), pred(nForceComps); + size_t idx = 0; + + // Fill obs/pred with fx, fy, fz + for (size_t i = 0; i < nStructs; ++i) { + const auto & s1 = st(f,i); + const auto & s2 = stp(f,i); + for (size_t a = 0; a < s1.natoms(); ++a) { + obs(idx) = s1(a).fx(); + pred(idx) = s2(a).fx(); + idx++; + + obs(idx) = s1(a).fy(); + pred(idx) = s2(a).fy(); + idx++; + + obs(idx) = s1(a).fz(); + pred(idx) = s2(a).fz(); + idx++; } - idx=0; } + f_r_sq_vec(f) = Statistics::r_sq(obs, pred); } return f_r_sq_vec; } -t_type Analytics::calc_s_r_sq() const { - t_type s_r_sq_vec(st.dbidx.size()-1); - size_t dbidx=0; - t_type obs(6*st.size(dbidx)); - t_type pred(6*st.size(dbidx)); - size_t idx=0; - for (size_t i=0; i<st.size(); ++i) { - - for (size_t x=0; x<3; ++x) { - for (size_t y=x; y<3; ++y) { - obs(idx) = st(i).stress(x,y); - pred(idx) = stp(i).stress(x,y); - idx++; - } - } +/** + * @brief Compute per-file R² of stress (collecting the 6 independent components). + */ +t_type Analytics::calc_s_r_sq() const +{ + t_type s_r_sq_vec(st.nFiles()); + for (size_t f = 0; f < st.nFiles(); ++f) + { + size_t nStructs = st.nStructuresInFile(f); + // Each structure => 6 unique stress components (sxx, syy, szz, sxy, sxz, syz) + t_type obs(6 * nStructs), pred(6 * nStructs); - if (i+1==st.dbidx[dbidx+1]) { - s_r_sq_vec(dbidx) = Statistics::r_sq(obs,pred); - dbidx++; - if (dbidx<st.dbidx.size()-2) { - obs.resize(6*st.size(dbidx)); - pred.resize(6*st.size(dbidx)); + size_t idx = 0; + for (size_t i = 0; i < nStructs; ++i) { + const auto & s1 = st(f,i); + const auto & s2 = stp(f,i); + + // Fill in (x,y) for x<=y + for (size_t x = 0; x < 3; ++x) { + for (size_t y = x; y < 3; ++y) { + obs(idx) = s1.stress(x,y); + pred(idx) = s2.stress(x,y); + idx++; + } } - idx=0; } + s_r_sq_vec(f) = Statistics::r_sq(obs, pred); } return s_r_sq_vec; } + +} // namespace mlip +} // namespace tadah + diff --git a/src/atom.cpp b/src/atom.cpp deleted file mode 100644 index 164a6c48fb0cbb3a59ace6f4568c5a8bce54886e..0000000000000000000000000000000000000000 --- a/src/atom.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include <tadah/mlip/atom.h> -#include <tadah/core/periodic_table.h> -#include <iomanip> - -Atom::Atom() {} - -Atom::Atom(const Element &element, - const double px, const double py, const double pz, - const double fx, const double fy, const double fz): - Element(element), - position(px,py,pz), - force(fx,fy,fz) -{} - -std::ostream& operator<<(std::ostream& os, const Atom& atom) -{ - os << (Element&)atom; - os << "Coordinates: " << std::left << atom.position << std::endl; - os << "Force: " << std::left << atom.force << std::endl; - return os; -} -bool Atom::operator==(const Atom &a) const { - double EPSILON = std::numeric_limits<double>::epsilon(); - return - position.isApprox(a.position, EPSILON) - && force.isApprox(a.force, EPSILON) - && Element::operator==(a) - ; -} -bool Atom::is_the_same(const Atom &a, double thr) const { - return - position.isApprox(a.position, thr) - && Element::operator==(a) - ; -} diff --git a/src/castep_castep_reader.cpp b/src/castep_castep_reader.cpp index f6c38a508d7b4ab72f1f85f7685f08134427719d..9f3a787ff61304c1704371db686ffecb636e410c 100644 --- a/src/castep_castep_reader.cpp +++ b/src/castep_castep_reader.cpp @@ -6,10 +6,17 @@ #include <tadah/mlip/structure_db.h> #include <tadah/mlip/dataset_readers/castep_castep_reader.h> -CastepCastepReader::CastepCastepReader(StructureDB& db) : DatasetReader(db) {} -CastepCastepReader::CastepCastepReader(StructureDB& db, const std::string& filename) -: DatasetReader(db, filename) { +namespace tadah { +namespace mlip { +CastepCastepReader::CastepCastepReader() : DatasetReader() {} + + +CastepCastepReader::CastepCastepReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile): + DatasetReader(db, filename, fileIndex, nStructsInFile) { read_data(filename); } @@ -27,6 +34,7 @@ void CastepCastepReader::read_data(const std::string& filename) { file.close(); } +// parse_data: fill HPC DB subranges [fileIndex_, 0..nStructsInFile_-1] void CastepCastepReader::parse_data() { std::istringstream stream(raw_data_); std::string line; @@ -40,7 +48,8 @@ void CastepCastepReader::parse_data() { bool restart = false; int scf_idx = 0; - Structure s; + std::size_t structIdxInFile = 0; + Structure *s = &(*stdb)(fileIndex_, structIdxInFile); std::string label; size_t counter=0; @@ -74,7 +83,8 @@ void CastepCastepReader::parse_data() { counter++; counter++; - s = Structure(); + //s = Structure(); + s = &(*stdb)(fileIndex_, ++structIdxInFile); for (int i = 0; i < 3; ++i) { if (!std::getline(stream, line)) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; @@ -83,7 +93,7 @@ void CastepCastepReader::parse_data() { } counter++; std::istringstream iss(line); - if (!(iss >> s.cell(0,i) >> s.cell(1,i) >> s.cell(2,i))) { + if (!(iss >> s->cell(0,i) >> s->cell(1,i) >> s->cell(2,i))) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading lattice vectors at row " << i << std::endl; break; @@ -92,7 +102,7 @@ void CastepCastepReader::parse_data() { if (constant_volume) label.clear(); constant_volume = false; - if (debug) std::cout << s.cell << std::endl; + if (debug) std::cout << s->cell << std::endl; } else if (line.find("Total number of ions in cell") != std::string::npos) { @@ -138,12 +148,14 @@ void CastepCastepReader::parse_data() { if (debug) std::cout << "Cell Contents" << std::endl; if (restart) { if (debug) std::cout << "Cell Contents restart" << std::endl; - s.atoms.clear(); + // s->atoms.clear(); } else { if (debug) std::cout << "Cell Contents no restart" << std::endl; if (debug) std::cout << line << std::endl; - s = Structure(); - s.cell = stdb.structures.back().cell; // copy last cell as it is not repeated in castep for const. volume + //s = Structure(); + s = &(*stdb)(fileIndex_, ++structIdxInFile); + //s->cell = stdb.structures.back().cell; // copy last cell as it is not repeated in castep for const. volume + s->cell = (*stdb)(fileIndex_, structIdxInFile-1).cell; // copy last cell as it is not repeated in castep for const. volume } } @@ -151,7 +163,7 @@ void CastepCastepReader::parse_data() { if (debug) std::cout << "Fractional coordinates of atoms" << std::endl; // Ideally st.natoms() should be zero by now. However for constant volum GO this is not the case. // Therfore we need to clear the atoms vector but we keep the st.cell as it is. - s.atoms.clear(); + // s->atoms.clear(); if (!std::getline(stream, line) || !std::getline(stream, line)) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading atom information" << std::endl; @@ -174,15 +186,21 @@ void CastepCastepReader::parse_data() { std::cerr << "Warning: Unexpected end of data when reading atomic coordiantes at row " << i << std::endl; break; } - s.add_atom(Atom(Element(type),px,py,pz,0,0,0)); - s.atoms[i].position = s.cell * s.atoms[i].position; // convert to abs + + int Z = Element::toAtomicNumber(type); + s->addAtom(Z,px,py,pz,0,0,0); + // s->atoms[i].position = s->cell * s->atoms[i].position; // convert to abs + auto posView = s->positionView(i); + transformBy(posView, s->cell); + //Vec3d temp = s->cell * s->atoms[i].position; // convert to abs + //s->x(i) = temp(0); s->y(i) = temp(1); s->z(i) = temp(2); } - if (debug) std::cout << "natoms: " << s.natoms() << std::endl; + if (debug) std::cout << "natoms: " << s->natoms() << std::endl; } else if (line.find("Cartesian components (eV/A)") != std::string::npos) { if (debug) std::cout << "Cartesian components (eV/A)" << std::endl; - if (s.natoms()!=natoms) continue; + if (s->natoms()!=natoms) continue; if (!std::getline(stream, line) || !std::getline(stream, line) || !std::getline(stream, line)) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading atom forces" << std::endl; @@ -204,7 +222,8 @@ void CastepCastepReader::parse_data() { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading atom forces: " << line << std::endl; } else { - s.atoms[i].force = Vec3d(fx,fy,fz); + // s->atoms[i].force = Vec3d(fx,fy,fz); + s->fx(i) = fx; s->fy(i) = fy; s->fz(i) = fz; } } } @@ -227,7 +246,7 @@ void CastepCastepReader::parse_data() { counter++; std::istringstream iss(line); std::string tmp; - if (!(iss >> tmp >> tmp >> s.stress(i,0) >> s.stress(i,1) >> s.stress(i,2))) { + if (!(iss >> tmp >> tmp >> s->stress(i,0) >> s->stress(i,1) >> s->stress(i,2))) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading atom forces: " << line << std::endl; } @@ -242,13 +261,13 @@ void CastepCastepReader::parse_data() { if (debug) std::cout << "Final free energy (E-TS)" << std::endl; std::istringstream iss(line); std::string tmp; - if (!(iss >> tmp >> tmp >> tmp >> tmp >> tmp >> s.energy)) { + if (!(iss >> tmp >> tmp >> tmp >> tmp >> tmp >> s->energy)) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading total energy" << std::endl; } // scf only loop if (!md_run && !go_run) { - s.label = "SCF run: " + std::to_string(scf_idx++); + s->label = "SCF run: " + std::to_string(scf_idx++); complete_structure = true; } } @@ -261,7 +280,7 @@ void CastepCastepReader::parse_data() { iss >> step >> step >> step >> step; // safe as found enthalpy= std::ostringstream oss; oss << std::boolalpha << constant_volume; - s.label = "CASTEP GO, const. volume: " + oss.str() + ", step: " + step; + s->label = "CASTEP GO, const. volume: " + oss.str() + ", step: " + step; complete_structure = true; } @@ -269,7 +288,7 @@ void CastepCastepReader::parse_data() { if (debug) std::cout << "Potential Energy:" << std::endl; std::istringstream iss(line); std::string tmp; - if (!(iss >> tmp >> tmp >> tmp >> s.energy)) { + if (!(iss >> tmp >> tmp >> tmp >> s->energy)) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading total energy" << std::endl; } @@ -279,28 +298,27 @@ void CastepCastepReader::parse_data() { // MD: end of iteration std::istringstream iss(line); std::string tmp; - if (!(iss >> tmp >> tmp >> s.T)) { + if (!(iss >> tmp >> tmp >> s->T)) { std::cerr << "Warning, file" << filename << " line: " << counter << std::endl; std::cerr << "Warning: Unexpected end of data when reading temperature" << std::endl; } if (!label.size())label = "CASTEP MD, const. volume: false, step: 0"; // the last option - s.label = label; + s->label = label; complete_structure = true; } if (complete_structure) { if (!stress_tensor_bool) { - s.stress.set_zero(); + s->stress.set_zero(); } else { - s.stress *= p_conv; // GPa -> eV/A^3 + s->stress *= p_conv; // GPa -> eV/A^3 } - stdb.add(s); + //stdb.add(s); complete_structure = false; } counter++; } - } void CastepCastepReader::print_summary() const { @@ -308,5 +326,52 @@ void CastepCastepReader::print_summary() const { } std::string CastepCastepReader::get_summary() const { - return stdb.summary(); + return stdb->summary(); +} + +void CastepCastepReader::pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) +{ + std::ifstream fin(filename); + if (!fin.is_open()) { + throw std::runtime_error("pass1Count: Could not open file " + filename); + } + + nStructures = 0; + atomsPerStructure.clear(); + + std::string line; + // We'll track the last known "Total number of ions in cell". + // Each time we see "Fractional coordinates of atoms", we assume + // that indicates a *new* Structure with lastNatoms atoms. + std::size_t lastNatoms = 0; + + while (std::getline(fin, line)) + { + // Example: parse "Total number of ions in cell" to get natoms + if (line.find("Total number of ions in cell") != std::string::npos) + { + // Very simplistic parse: the 8th token is the # of atoms. + std::istringstream iss(line); + std::string tmp; + int count = 0; + while (iss >> tmp) { + if (++count == 7) { + iss >> lastNatoms; + } + } + } + // Example: each time we see "Fractional coordinates of atoms", + // we treat that as the start of a new structure. + else if (line.find("Fractional coordinates of atoms") != std::string::npos) + { + nStructures++; + atomsPerStructure.push_back(lastNatoms); + } + } + fin.close(); +} + +} } diff --git a/src/castep_cell_writer.cpp b/src/castep_cell_writer.cpp index 144da27ab7715efa0a652dfc27c88ad8745a2d94..30255238716f3b2d363598dd79b05e941dbd999d 100644 --- a/src/castep_cell_writer.cpp +++ b/src/castep_cell_writer.cpp @@ -1,13 +1,17 @@ +#include <tadah/core/element.h> #include <tadah/mlip/structure.h> #include <tadah/mlip/structure_db.h> #include <tadah/mlip/dataset_writers/castep_cell_writer.h> -CastepCellWriter::CastepCellWriter(StructureDB& db) : DatasetWriter(db) {} +namespace tadah { +namespace mlip { -void CastepCellWriter::write_data(const std::string& filename, const size_t i) { +CastepCellWriter::CastepCellWriter() : DatasetWriter() {} - if (i >= stdb.size()) { - throw std::out_of_range("Index i is out of range."); +void CastepCellWriter::write_data(const StructureDB &stdb, const std::string& filename, size_t structIndex) { + + if (structIndex >= stdb.size()) { + throw std::out_of_range("Index structIndex is out of range."); } std::ofstream file(filename); @@ -15,7 +19,7 @@ void CastepCellWriter::write_data(const std::string& filename, const size_t i) { throw std::runtime_error("Could not open the file: " + filename); } - const Structure &st = stdb(i); + const Structure &st = stdb(structIndex); // write label file << "# " << st.label << std::endl; @@ -38,16 +42,19 @@ void CastepCellWriter::write_data(const std::string& filename, const size_t i) { file << "%BLOCK POSITIONS_ABS" << std::endl; for (const auto &atom : st) { + std::string symbol = Element::toSymbol(atom.atomicNumber()); file << std::right << std::fixed << std::setw(w) - << atom.symbol; + << symbol; file << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << atom.position(0); + << std::setprecision(p) << atom.x(); file << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << atom.position(1); + << std::setprecision(p) << atom.y(); file << std::right << std::fixed << std::setw(w) - << std::setprecision(p) << atom.position(2); + << std::setprecision(p) << atom.z(); file << std::endl; } file << "%ENDBLOCK POSITIONS_ABS " << std::endl; file.close(); } +} +} diff --git a/src/castep_geom_reader.cpp b/src/castep_geom_reader.cpp index 9cd0894ea5435131b2c0c196b74eada4f36461eb..da0c8b2a278b11afcb999c0c9386ab49579bda85 100644 --- a/src/castep_geom_reader.cpp +++ b/src/castep_geom_reader.cpp @@ -2,19 +2,28 @@ #include <tadah/mlip/dataset_readers/castep_geom_reader.h> #include <tadah/mlip/dataset_readers/castep_md_reader.h> -CastepGeomReader::CastepGeomReader(StructureDB& db) -: CastepMDReader(db) {} +namespace tadah { +namespace mlip { -CastepGeomReader::CastepGeomReader(StructureDB& db, const std::string& filename) -: CastepMDReader(db, filename) {} +CastepGeomReader::CastepGeomReader() +: CastepMDReader() {} + +CastepGeomReader::CastepGeomReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile): + CastepMDReader(db, filename, fileIndex, nStructsInFile) { +} double CastepGeomReader::calc_P_ideal(Structure &, double ) { return 0; // dummy as geom stress tensor os virial } std::string CastepGeomReader::get_first_label(std::string &step) { - return "Filename: "+filename_+ " | Units: (eV, Angstrom) | Step: " + step; + return "Filename: "+filename+ " | Units: (eV, Angstrom) | Step: " + step; } std::string CastepGeomReader::get_label(std::string &step) { return "Step: " + step; } +} +} diff --git a/src/castep_md_reader.cpp b/src/castep_md_reader.cpp index d75b260c441f298c11273da9dc44d661e446bad2..92561b2cde23af394cfda30cf812b86d4a53b71c 100644 --- a/src/castep_md_reader.cpp +++ b/src/castep_md_reader.cpp @@ -1,16 +1,21 @@ #include <tadah/core/utils.h> #include <tadah/mlip/dataset_readers/castep_md_reader.h> -CastepMDReader::CastepMDReader(StructureDB& db) -: DatasetReader(db) {} +namespace tadah { +namespace mlip { -CastepMDReader::CastepMDReader(StructureDB& db, const std::string& filename) -: DatasetReader(db, filename) { +CastepMDReader::CastepMDReader() +: DatasetReader() {} + +CastepMDReader::CastepMDReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile): + DatasetReader(db, filename, fileIndex, nStructsInFile) { read_data(filename); } void CastepMDReader::read_data(const std::string& filename) { - filename_ = filename; std::ifstream file(filename); if (!file.is_open()) { throw std::runtime_error("Could not open the file: " + filename); @@ -33,31 +38,32 @@ double CastepMDReader::calc_P_ideal(Structure &s, double T) { int N = s.natoms(); return N*k_b*T/V; } -void CastepMDReader::postproc_structure(Structure &s) { +void CastepMDReader::postproc_structure(Structure *s, size_t &structIdxInFile) { if (!stress_tensor_bool) { - s.stress.set_zero(); + s->stress.set_zero(); } else { // stress in .md file contains kinetic contributions // so we remove it to obtain pure virial stress tensor // P = N k_b T / V - s.stress -= calc_P_ideal(s,T); + s->stress -= calc_P_ideal(*s,T); } // finish conversion - s.cell *= d_conv; - s.stress *= s_conv; - s.T = T/k_b; + s->cell *= d_conv; + s->stress *= s_conv; + s->T = T/k_b; // add to database - stdb.add(s); + // stdb.add(s); // reset stress_tensor_bool = false; - s = Structure(); + s = &(*stdb)(fileIndex_, ++structIdxInFile); + // s = Structure(); } std::string CastepMDReader::get_first_label(std::string &time) { - return "Filename: "+filename_+ " | Units: (eV, Angstrom) | Time: " + return "Filename: "+filename+ " | Units: (eV, Angstrom) | Time: " + std::to_string(std::stod(time)*t_conv) + " ps"; } std::string CastepMDReader::get_label(std::string &time) { @@ -77,7 +83,9 @@ void CastepMDReader::parse_data() { std::istringstream stream(raw_data_); std::string line; - Structure s; + // Structure s; + std::size_t structIdxInFile = 0; + Structure *s = &(*stdb)(fileIndex_, structIdxInFile); size_t cell_idx=0; size_t stress_idx=0; size_t force_idx=0; @@ -88,8 +96,8 @@ void CastepMDReader::parse_data() { while (std::getline(stream, line)) { std::istringstream iss(line); if (ends_with(line,"<-- E")) { - iss >> s.energy; - s.energy *= e_conv; + iss >> s->energy; + s->energy *= e_conv; break; } iss >> time; @@ -120,8 +128,8 @@ void CastepMDReader::parse_data() { continue; } std::istringstream iss(line); - iss >> s.energy; - s.energy *= e_conv; + iss >> s->energy; + s->energy *= e_conv; E_flag=true; } else if (ends_with(line,"<-- h")) { @@ -131,7 +139,7 @@ void CastepMDReader::parse_data() { continue; } std::istringstream iss(line); - if (!(iss >> s.cell(0,cell_idx) >> s.cell(1,cell_idx) >> s.cell(2,cell_idx))) + if (!(iss >> s->cell(0,cell_idx) >> s->cell(1,cell_idx) >> s->cell(2,cell_idx))) std::cerr << "Warning: Unexpected end of data when reading lattice vectors at row " << cell_idx << std::endl; cell_idx++; H_flag++; @@ -144,7 +152,7 @@ void CastepMDReader::parse_data() { continue; } std::istringstream iss(line); - if (!(iss >> s.stress(0,stress_idx) >> s.stress(1,stress_idx) >> s.stress(2,stress_idx))) + if (!(iss >> s->stress(0,stress_idx) >> s->stress(1,stress_idx) >> s->stress(2,stress_idx))) std::cerr << "Warning: Unexpected end of data when reading stress tensor at row " << stress_idx << std::endl; stress_idx++; stress_tensor_bool = true; @@ -162,11 +170,13 @@ void CastepMDReader::parse_data() { std::istringstream iss(line); if (!(iss >> element >> temp >> px >> py >> pz)) std::cerr << "Warning: Unexpected end of data when reading atomic positions:\n" << line << std::endl; - s.add_atom(Atom(Element(element),px*d_conv,py*d_conv,pz*d_conv,0,0,0)); + // s->add_atom(Atom(Element(element),px*d_conv,py*d_conv,pz*d_conv,0,0,0)); + int Z = Element::toAtomicNumber(element); + s->addAtom(Z,px*d_conv,py*d_conv,pz*d_conv,0,0,0); R_flag=true; } else if (ends_with(line,"<-- F")) { - if (/* !T_flag || */ !E_flag || H_flag!=3 || !R_flag || force_idx==s.natoms() || complete_structure) { + if (/* !T_flag || */ !E_flag || H_flag!=3 || !R_flag || force_idx==s->natoms() || complete_structure) { error=true; continue; } @@ -176,16 +186,19 @@ void CastepMDReader::parse_data() { std::istringstream iss(line); if (!(iss >> element >> temp >> fx >> fy >> fz)) std::cerr << "Warning: Unexpected end of data when reading atomic forces:\n" << line << std::endl; - s.atoms[force_idx].force[0]=fx*f_conv; - s.atoms[force_idx].force[1]=fy*f_conv; - s.atoms[force_idx].force[2]=fz*f_conv; + // s->atoms[force_idx].force[0]=fx*f_conv; + // s->atoms[force_idx].force[1]=fy*f_conv; + // s->atoms[force_idx].force[2]=fz*f_conv; + s->fx(force_idx) = fx*f_conv; + s->fy(force_idx) = fy*f_conv; + s->fz(force_idx) = fz*f_conv; force_idx++; F_flag=true; - if (force_idx==s.natoms()) complete_structure=true; + if (force_idx==s->natoms()) complete_structure=true; } else if (is_blank_line(line)) { if (!error && complete_structure) - postproc_structure(s); + postproc_structure(s, structIdxInFile); error=false; T_flag=false; @@ -201,7 +214,7 @@ void CastepMDReader::parse_data() { std::string time; std::istringstream iss(line); iss >> time; - s.label = !stdb.size() ? get_first_label(time) : get_label(time); + s->label = !stdb->size() ? get_first_label(time) : get_label(time); } cell_idx=0; @@ -212,8 +225,8 @@ void CastepMDReader::parse_data() { } // in case there is no blank line at the end we have to add last strcuture here - if (s.natoms() && !error && complete_structure) { - postproc_structure(s); + if (s->natoms() && !error && complete_structure) { + postproc_structure(s, structIdxInFile); } } @@ -222,5 +235,51 @@ void CastepMDReader::print_summary() const { } std::string CastepMDReader::get_summary() const { - return stdb.summary(); + return stdb->summary(); +} +void CastepMDReader::pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) +{ + std::ifstream fin(filename); + if (!fin.is_open()) { + throw std::runtime_error("pass1Count: Could not open file " + filename); + } + + nStructures = 0; + atomsPerStructure.clear(); + + std::string line; + // We'll track the last known "Total number of ions in cell". + // Each time we see "Fractional coordinates of atoms", we assume + // that indicates a *new* Structure with lastNatoms atoms. + std::size_t lastNatoms = 0; + + while (std::getline(fin, line)) + { + // Example: parse "Total number of ions in cell" to get natoms + if (line.find("Total number of ions in cell") != std::string::npos) + { + // Very simplistic parse: the 8th token is the # of atoms. + std::istringstream iss(line); + std::string tmp; + int count = 0; + while (iss >> tmp) { + if (++count == 7) { + iss >> lastNatoms; + } + } + } + // Example: each time we see "Fractional coordinates of atoms", + // we treat that as the start of a new structure. + else if (line.find("Fractional coordinates of atoms") != std::string::npos) + { + nStructures++; + atomsPerStructure.push_back(lastNatoms); + } + } + fin.close(); } + +} // namespace mlip +} // namespace tadah diff --git a/src/dataset_reader.cpp b/src/dataset_reader.cpp index 048bbd6b1481a5f76ec8306541b701b6dd259a68..da5ac6eb752f60e23eaacea7bf1e758a501275a4 100644 --- a/src/dataset_reader.cpp +++ b/src/dataset_reader.cpp @@ -1,4 +1,10 @@ #include <tadah/mlip/dataset_readers/dataset_reader.h> +namespace tadah { +namespace mlip { + std::string DatasetReader::get_first_label(std::string &) { return std::string(); } std::string DatasetReader::get_label(std::string &) { return std::string(); } + +} // namespace mlip +} // namespace tadah diff --git a/src/dataset_reader_selector.cpp b/src/dataset_reader_selector.cpp index ef5d8b1cd6ce3d692796a424c5cb36f4b8a1353c..cb6117016fd7a71c44781641ffef4c4547843fd1 100644 --- a/src/dataset_reader_selector.cpp +++ b/src/dataset_reader_selector.cpp @@ -8,21 +8,45 @@ #include <fstream> #include <iostream> +namespace tadah { +namespace mlip { + // Factory method implementation -std::unique_ptr<DatasetReader> DatasetReaderSelector::get_reader(const std::string& filepath, StructureDB& db) { +std::unique_ptr<DatasetReader> DatasetReaderSelector::get_reader(StructureDB* db, + const std::string& filepath, + std::size_t fileIndex, + std::size_t nStructsInFile) { std::string type = determine_file_type_by_content(filepath); + if (type == "CASTEP.CASTEP") { + return std::make_unique<CastepCastepReader>(db,filepath,fileIndex,nStructsInFile); + } else if (type == "CASTEP.MD") { + return std::make_unique<CastepMDReader>(db,filepath,fileIndex,nStructsInFile); + } else if (type == "CASTEP.GEOM") { + return std::make_unique<CastepGeomReader>(db,filepath,fileIndex,nStructsInFile); + } else if (type == "VASP.VASPRUN") { + return std::make_unique<VaspVasprunReader>(db,filepath,fileIndex,nStructsInFile); + } else if (type == "VASP.OUTCAR") { + return std::make_unique<VaspOutcarReader>(db,filepath,fileIndex,nStructsInFile); + } else { + std::cerr << "Unknown type! Returning nullptr." << std::endl; + return nullptr; + } +} +std::unique_ptr<DatasetReader> DatasetReaderSelector::get_reader(const std::string& filepath) { + + std::string type = determine_file_type_by_content(filepath); if (type == "CASTEP.CASTEP") { - return std::make_unique<CastepCastepReader>(db,filepath); + return std::make_unique<CastepCastepReader>(); } else if (type == "CASTEP.MD") { - return std::make_unique<CastepMDReader>(db,filepath); + return std::make_unique<CastepMDReader>(); } else if (type == "CASTEP.GEOM") { - return std::make_unique<CastepGeomReader>(db,filepath); + return std::make_unique<CastepGeomReader>(); } else if (type == "VASP.VASPRUN") { - return std::make_unique<VaspVasprunReader>(db,filepath); + return std::make_unique<VaspVasprunReader>(); } else if (type == "VASP.OUTCAR") { - return std::make_unique<VaspOutcarReader>(db,filepath); + return std::make_unique<VaspOutcarReader>(); } else { std::cerr << "Unknown type! Returning nullptr." << std::endl; return nullptr; @@ -59,3 +83,5 @@ std::string DatasetReaderSelector::determine_file_type_by_content(const std::str return "Unknown file type"; } +} // namespace mlip +} // namespace tadah diff --git a/src/dataset_writer_selector.cpp b/src/dataset_writer_selector.cpp index c038bedb81b2a239201601371d1b15761a39933d..2aa8222c964f29de0f3d066ef94cea04a4c66ed2 100644 --- a/src/dataset_writer_selector.cpp +++ b/src/dataset_writer_selector.cpp @@ -3,18 +3,23 @@ #include <tadah/mlip/dataset_writers/vasp_poscar_writer.h> #include <tadah/mlip/dataset_writers/lammps_structure_writer.h> +namespace tadah { +namespace mlip { + // Factory method implementation -std::unique_ptr<DatasetWriter> DatasetWriterSelector::get_writer(const std::string& type, StructureDB& db) { +std::unique_ptr<DatasetWriter> DatasetWriterSelector::get_writer(std::string type) { if (type == "CASTEP") { - return std::make_unique<CastepCellWriter>(db); + return std::make_unique<CastepCellWriter>(); } else if (type == "VASP") { - return std::make_unique<VaspPoscarWriter>(db); + return std::make_unique<VaspPoscarWriter>(); } else if (type == "LAMMPS") { - return std::make_unique<LammpsStructureWriter>(db); + return std::make_unique<LammpsStructureWriter>(); } else { std::cerr << "Unknown type! Returning nullptr." << std::endl; return nullptr; } } +} // namespace mlip +} // namespace tadah diff --git a/src/dm_bf_base.cpp b/src/dm_bf_base.cpp index 946f455176b336eb33ee5e35f45d76ef6177a422..ef752f21b7505998204819afc9cd23d45b095f8f 100644 --- a/src/dm_bf_base.cpp +++ b/src/dm_bf_base.cpp @@ -1,9 +1,13 @@ #include "tadah/mlip/design_matrix/functions/dm_function_base.h" #include "tadah/models/functions/basis_functions/bf_base.h" #include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_base.h> +namespace tadah { +namespace mlip { DM_BF_Base::DM_BF_Base() {} DM_BF_Base::DM_BF_Base(const Config &c): Function_Base(c), BF_Base(c), DM_Function_Base(c) {} DM_BF_Base::~DM_BF_Base() {} +} +} diff --git a/src/dm_bf_linear.cpp b/src/dm_bf_linear.cpp index 4f5dac29f3e518f3aeb8059c25a729f782793551..7d576fa403b4c5a6e1d11b877a12f1b6fbc477b4 100644 --- a/src/dm_bf_linear.cpp +++ b/src/dm_bf_linear.cpp @@ -3,6 +3,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_BF_Linear> DM_BF_Linear_1( "BF_Linear" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_BF_Linear> DM_BF_Linear_2( "BF_Linear" ); +namespace tadah { +namespace mlip { DM_BF_Linear::DM_BF_Linear() {} DM_BF_Linear::DM_BF_Linear(const Config &c): Function_Base(c), @@ -69,3 +71,5 @@ void DM_BF_Linear::calc_phi_stress_rows(phi_type &Phi, size_t &row, } row += 6; } +} +} diff --git a/src/dm_bf_polynomial2.cpp b/src/dm_bf_polynomial2.cpp index c7fb188a21aa82c4fe0575f8bb130a8b4ad76b99..ddc417e92dd2c6f0cd9450d221fe6a49d62d3f42 100644 --- a/src/dm_bf_polynomial2.cpp +++ b/src/dm_bf_polynomial2.cpp @@ -5,6 +5,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_BF_Polynomial2> DM_BF_Polynomial2_1( "BF_Polynomial2" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_BF_Polynomial2> DM_BF_Polynomial2_2( "BF_Polynomial2" ); +namespace tadah { +namespace mlip { DM_BF_Polynomial2::DM_BF_Polynomial2() {} DM_BF_Polynomial2::DM_BF_Polynomial2(const Config &c): Function_Base(c), @@ -99,3 +101,5 @@ void DM_BF_Polynomial2::calc_phi_stress_rows(phi_type &Phi, } row += 6; } +} +} diff --git a/src/dm_f_all.cpp b/src/dm_f_all.cpp index 64a583a2ead0ea791730a3e0b5d00f8d0e78e84c..924e3af89b6ed0cbba5695e66431bbc6f2b118cf 100644 --- a/src/dm_f_all.cpp +++ b/src/dm_f_all.cpp @@ -1,5 +1,7 @@ #include <tadah/mlip/design_matrix/functions/dm_f_all.h> +namespace tadah { +namespace mlip { template<> CONFIG::Registry<DM_Function_Base>::Map CONFIG::Registry<DM_Function_Base>::registry{}; template<> CONFIG::Registry<DM_Function_Base,Config&>::Map CONFIG::Registry<DM_Function_Base,Config&>::registry{}; @@ -20,3 +22,5 @@ CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_RBF> DM_Kern_RBF_1( "Kern_R CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_RBF> DM_Kern_RBF_2( "Kern_RBF" ); CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_Sigmoid> DM_Kern_Sigmoid_1( "Kern_Sigmoid" ); CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_Sigmoid> DM_Kern_Sigmoid_2( "Kern_Sigmoid" ); +} +} diff --git a/src/dm_function_base.cpp b/src/dm_function_base.cpp index 945c4344841ae7c84c896e0cd66ac226cf2a1cdb..7eb5063d74f666e795f909a28cd4266cab6f4c22 100644 --- a/src/dm_function_base.cpp +++ b/src/dm_function_base.cpp @@ -1,5 +1,9 @@ #include "tadah/models/functions/function_base.h" #include <tadah/mlip/design_matrix/functions/dm_function_base.h> +namespace tadah { +namespace mlip { DM_Function_Base::DM_Function_Base() {} DM_Function_Base::DM_Function_Base(const Config &c): Function_Base(c) {} DM_Function_Base::~DM_Function_Base() {} +} +} diff --git a/src/dm_kern_base.cpp b/src/dm_kern_base.cpp index f78c71e6bc3b629f6b5fac6e5af774acb86d1a8e..f3fe327eb789621b2c985d0dbd210364d4ec4e91 100644 --- a/src/dm_kern_base.cpp +++ b/src/dm_kern_base.cpp @@ -3,6 +3,8 @@ #include "tadah/models/functions/kernels/kern_base.h" #include <tadah/mlip/design_matrix/functions/kernels/dm_kern_base.h> +namespace tadah { +namespace mlip { DM_Kern_Base::~DM_Kern_Base() {} DM_Kern_Base::DM_Kern_Base() {} DM_Kern_Base::DM_Kern_Base(const Config &c): @@ -73,3 +75,5 @@ void DM_Kern_Base::calc_phi_stress_rows(phi_type &Phi, size_t &row, const doubl } row+=6; } +} +} diff --git a/src/dm_kern_linear.cpp b/src/dm_kern_linear.cpp index 84c484b269e73f11dfc2fcf7c31cb3ead2c9d477..dc12670db07e33d890d493067d0961ebb12b2319 100644 --- a/src/dm_kern_linear.cpp +++ b/src/dm_kern_linear.cpp @@ -4,6 +4,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_Linear> DM_Kern_Linear_1( "Kern_Linear" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_Linear> DM_Kern_Linear_2( "Kern_Linear" ); +namespace tadah { +namespace mlip { DM_Kern_Linear::DM_Kern_Linear() {} DM_Kern_Linear::DM_Kern_Linear (const Config &c): Function_Base(c), @@ -71,3 +73,5 @@ void DM_Kern_Linear::calc_phi_stress_rows(phi_type &Phi, size_t &row, } row += 6; } +} +} diff --git a/src/dm_kern_lq.cpp b/src/dm_kern_lq.cpp index dbef59e32ef1d37e7899c0f43994a076948f3374..5f97ee18933a443e2250780a4ecaa1247684c125 100644 --- a/src/dm_kern_lq.cpp +++ b/src/dm_kern_lq.cpp @@ -5,6 +5,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_LQ> DM_Kern_LQ_1( "Kern_LQ" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_LQ> DM_Kern_LQ_2( "Kern_LQ" ); +namespace tadah { +namespace mlip { DM_Kern_LQ::DM_Kern_LQ() {} DM_Kern_LQ::DM_Kern_LQ(const Config &c): @@ -12,3 +14,5 @@ DM_Kern_LQ::DM_Kern_LQ(const Config &c): DM_Kern_Base(c), Kern_LQ(c) {} +} +} diff --git a/src/dm_kern_polynomial.cpp b/src/dm_kern_polynomial.cpp index e8637b1625d9647309ae46496f0f63263a464b3e..817597d2d0acf990d23b09dc556dc2973ec04408 100644 --- a/src/dm_kern_polynomial.cpp +++ b/src/dm_kern_polynomial.cpp @@ -3,6 +3,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_Polynomial> DM_Kern_Polynomial_1( "Kern_Polynomial" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_Polynomial> DM_Kern_Polynomial_2( "Kern_Polynomial" ); +namespace tadah { +namespace mlip { DM_Kern_Polynomial::DM_Kern_Polynomial() {} DM_Kern_Polynomial::DM_Kern_Polynomial(const Config &c): @@ -10,3 +12,5 @@ DM_Kern_Polynomial::DM_Kern_Polynomial(const Config &c): DM_Kern_Base(c), Kern_Polynomial(c) {} +} +} diff --git a/src/dm_kern_quadratic.cpp b/src/dm_kern_quadratic.cpp index 72ab0d47e6ca382c31204f4c2baaf54e837b09d0..6ec1aa82d6a53786f2eb06d82461b7a344618809 100644 --- a/src/dm_kern_quadratic.cpp +++ b/src/dm_kern_quadratic.cpp @@ -4,6 +4,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_Quadratic> DM_Kern_Quadratic_1( "Kern_Quadratic" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_Quadratic> DM_Kern_Quadratic_2( "Kern_Quadratic" ); +namespace tadah { +namespace mlip { DM_Kern_Quadratic::DM_Kern_Quadratic() {} DM_Kern_Quadratic::DM_Kern_Quadratic(const Config &c): @@ -11,3 +13,5 @@ DM_Kern_Quadratic::DM_Kern_Quadratic(const Config &c): DM_Kern_Base(c), Kern_Quadratic(c) {} +} +} diff --git a/src/dm_kern_rbf.cpp b/src/dm_kern_rbf.cpp index 41d57bea8461d633b3241263b4ccc5905fb00bfe..2aa068a7e57e2d94d1384f1fdf3f8a9194cb3ecf 100644 --- a/src/dm_kern_rbf.cpp +++ b/src/dm_kern_rbf.cpp @@ -3,6 +3,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_RBF> DM_Kern_RBF_1( "Kern_RBF" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_RBF> DM_Kern_RBF_2( "Kern_RBF" ); +namespace tadah { +namespace mlip { DM_Kern_RBF::DM_Kern_RBF() {} DM_Kern_RBF::DM_Kern_RBF(const Config &c): @@ -10,3 +12,5 @@ DM_Kern_RBF::DM_Kern_RBF(const Config &c): DM_Kern_Base(c), Kern_RBF(c) {} +} +} diff --git a/src/dm_kern_sigmoid.cpp b/src/dm_kern_sigmoid.cpp index 1c3d25bfc432591378a3c4948d76ac3ea6965ecd..a75f03109674ffe4f7fc4c13fc9443023f37ff2c 100644 --- a/src/dm_kern_sigmoid.cpp +++ b/src/dm_kern_sigmoid.cpp @@ -3,6 +3,8 @@ //CONFIG::Registry<DM_Function_Base>::Register<DM_Kern_Sigmoid> DM_Kern_Sigmoid_1( "Kern_Sigmoid" ); //CONFIG::Registry<DM_Function_Base,Config&>::Register<DM_Kern_Sigmoid> DM_Kern_Sigmoid_2( "Kern_Sigmoid" ); +namespace tadah { +namespace mlip { DM_Kern_Sigmoid::DM_Kern_Sigmoid() {} DM_Kern_Sigmoid::DM_Kern_Sigmoid(const Config &c): @@ -10,3 +12,5 @@ DM_Kern_Sigmoid::DM_Kern_Sigmoid(const Config &c): DM_Kern_Base(c), Kern_Sigmoid(c) {} +} +} diff --git a/src/lammps_structure_writer.cpp b/src/lammps_structure_writer.cpp index e634f3837cb870add5d88fb12e086c10472bbd5e..87f14f57cc91aafb73f2a5207bf1ae92ec233511 100644 --- a/src/lammps_structure_writer.cpp +++ b/src/lammps_structure_writer.cpp @@ -2,12 +2,15 @@ #include <tadah/mlip/structure_db.h> #include <tadah/mlip/dataset_writers/lammps_structure_writer.h> -LammpsStructureWriter::LammpsStructureWriter(StructureDB& db) : DatasetWriter(db) {} +namespace tadah { +namespace mlip { -void LammpsStructureWriter::write_data(const std::string& filename, const size_t i) { +LammpsStructureWriter::LammpsStructureWriter() : DatasetWriter() {} - if (i >= stdb.size()) { - throw std::out_of_range("Index i is out of range."); +void LammpsStructureWriter::write_data(const StructureDB &stdb, const std::string& filename, size_t structIndex) { + + if (structIndex >= stdb.size()) { + throw std::out_of_range("Index structIndex is out of range."); } std::ofstream file(filename); @@ -15,7 +18,7 @@ void LammpsStructureWriter::write_data(const std::string& filename, const size_t throw std::runtime_error("Could not open the file: " + filename); } - const Structure &st = stdb(i); + const Structure &st = stdb(structIndex); // compute number of atoms for a given element const auto &elements = st.get_unique_elements(); @@ -86,3 +89,5 @@ void LammpsStructureWriter::write_data(const std::string& filename, const size_t file.close(); } +} // namespace mlip +} // namespace tadah diff --git a/src/m_all.cpp b/src/m_all.cpp index c8387e66f3a0677daf42b1c2a97e5b478ca85bf6..5f502149af1710358b2a11532d23e13a31f5a095 100644 --- a/src/m_all.cpp +++ b/src/m_all.cpp @@ -2,6 +2,8 @@ #include <tadah/models/memory/IModelsWorkspaceManager.h> #include <tadah/mlip/memory/IMLIPWorkspaceManager.h> +namespace tadah { +namespace mlip { template<> CONFIG::Registry<M_Tadah_Base, DM_Function_Base&, Config&>::Map CONFIG::Registry<M_Tadah_Base, DM_Function_Base&, Config&>::registry{}; @@ -22,3 +24,5 @@ CONFIG::Registry<M_Tadah_Base, DM_Function_Base&, Config&, tadah::mlip::memory:: CONFIG::Registry<M_Tadah_Base, DM_Function_Base&, Config&, tadah::mlip::memory::IMLIPWorkspaceManager&>::Register<M_BLR<>> M_BLR_3("M_BLR"); CONFIG::Registry<M_Tadah_Base, DM_Function_Base&, Config&, tadah::mlip::memory::IMLIPWorkspaceManager&>::Register<M_KRR<>> M_KRR_3("M_KRR"); +} +} diff --git a/src/m_blr.cpp b/src/m_blr.cpp index bdb85a73602412308a15ce55a4977f548d0ba742..d7fe68fdb14a7c655896f0a9bc18feb4e4b7eafb 100644 --- a/src/m_blr.cpp +++ b/src/m_blr.cpp @@ -1,2 +1,6 @@ #include <tadah/mlip/models/m_blr.h> #include <tadah/core/registry.h> +namespace tadah { +namespace mlip { +} +} diff --git a/src/m_krr.cpp b/src/m_krr.cpp index d3cff341128e424be5e57afe5723901340524082..dce40ec69bdbfdc6ee789f48db48ce909f3b3b8b 100644 --- a/src/m_krr.cpp +++ b/src/m_krr.cpp @@ -1,2 +1,6 @@ #include <tadah/mlip/models/m_krr.h> #include <tadah/core/registry.h> +namespace tadah { +namespace mlip { +} +} diff --git a/src/m_tadah_base.cpp b/src/m_tadah_base.cpp index bdba3571b40fd87a99c37fa4e1bab55fdd69952f..b1ea7af6adfd20d81a208879d6c06566ca0ffe21 100644 --- a/src/m_tadah_base.cpp +++ b/src/m_tadah_base.cpp @@ -1,5 +1,7 @@ #include <tadah/mlip/models/m_tadah_base.h> +namespace tadah { +namespace mlip { void M_Tadah_Base:: fpredict(const size_t a, force_type &v, const StDescriptors &std, const Structure &st) @@ -166,3 +168,5 @@ stress_force_predict(const StDescriptors &std, Structure &st_) } st_.stress = s; } +} +} diff --git a/src/nn_finder.cpp b/src/nn_finder.cpp index 81569cb2ca6af9db7a8eb56d8c06a919c964121d..369fa1e399c2145d0c7972294f992e47a7c54103 100644 --- a/src/nn_finder.cpp +++ b/src/nn_finder.cpp @@ -8,6 +8,8 @@ #include <iostream> #include <vector> +namespace tadah { +namespace mlip { // Constructor NNFinder::NNFinder(Config &config) { @@ -284,3 +286,5 @@ void NNFinder::calc(StructureDB &stdb) << seconds << " seconds\n"; } +} +} diff --git a/src/st_descriptors.cpp b/src/st_descriptors.cpp index a0407e8e1eb6f130332b15ad4121ab2bea6a14d2..25b14f46a439635dc12993179f7dcec829f37627 100644 --- a/src/st_descriptors.cpp +++ b/src/st_descriptors.cpp @@ -1,5 +1,7 @@ #include <tadah/mlip/st_descriptors.h> +namespace tadah { +namespace mlip { StDescriptors::StDescriptors(const Structure &s, const Config &c): // fully initialize aed aeds(c.get<size_t>("DSIZE"), s.natoms()), @@ -47,3 +49,5 @@ size_t StDescriptors::naed() const { size_t StDescriptors::dim() const { return aeds.rows(); } +} +} diff --git a/src/st_descriptors_db.cpp b/src/st_descriptors_db.cpp index 3dce74f8c5e21d70596c39a810406923ebbd990c..d7b5b436e9b6b444798c2c114e5ca603bff95045 100644 --- a/src/st_descriptors_db.cpp +++ b/src/st_descriptors_db.cpp @@ -1,5 +1,7 @@ #include <tadah/mlip/st_descriptors_db.h> +namespace tadah { +namespace mlip { StDescriptorsDB::StDescriptorsDB(const StructureDB &stdb, Config &config): st_descs(stdb.size()) { @@ -33,3 +35,5 @@ std::vector<StDescriptors>::const_iterator StDescriptorsDB::begin() const { std::vector<StDescriptors>::const_iterator StDescriptorsDB::end() const { return st_descs.cend(); } +} +} diff --git a/src/statistics.cpp b/src/statistics.cpp index 255d86761eb19c2008b62246bdf9f8ea181677bd..6447240f45326e36262fd658fb986d0750647489 100644 --- a/src/statistics.cpp +++ b/src/statistics.cpp @@ -2,6 +2,8 @@ #include <cmath> +namespace tadah { +namespace mlip { double Statistics::res_sum_sq(const vec &obs, const vec &pred) { if (obs.size() != pred.size()) throw std::runtime_error("Containers differ in size."); @@ -33,3 +35,5 @@ double Statistics::variance(const vec &v) { double Statistics::mean(const vec &v) { return v.mean(); } +} +} diff --git a/src/structure.cpp b/src/structure.cpp deleted file mode 100644 index ee3ca19c640ae715b5e7d92070e5cebadf3e023d..0000000000000000000000000000000000000000 --- a/src/structure.cpp +++ /dev/null @@ -1,354 +0,0 @@ -#include <tadah/mlip/structure.h> -#include <tadah/core/periodic_table.h> - -#include <stdexcept> -#include <cmath> - -Structure::Structure() { - PeriodicTable::initialize(); -} - -const Atom &Structure::operator()(const size_t i) const{ - return atoms[i]; -} - -void Structure::add_atom(Atom a) { - atoms.push_back(a); -} -void Structure::remove_atom(const size_t i) { - atoms.erase(atoms.begin()+i); -} - -Vec3d Structure::nn_pos(const size_t i, const size_t n) const -{ - // (A) Global index of this neighbor - const size_t neighborIndex = near_neigh_idx[i][n]; - - // (B) Atom's original "unshifted" position: - const Vec3d &posNeighbor = atoms[neighborIndex].position; - - // (C) Convert the stored shift -> real displacement shiftDisp - // If near_neigh_shift[i][n] is an integer triple (n1, n2, n3), - // multiply by the cell. Otherwise, if it's already in real space, - // you can just do: Vec3d shiftDisp = near_neigh_shift[i][n]. - Vec3d shift = near_neigh_shift[i][n]; // might be (n1, n2, n3) - - // If shift is integer triple (n1, n2, n3), multiply with cell: - Vec3d shiftDisp; - shiftDisp[0] = shift[0]*cell(0,0) + shift[1]*cell(0,1) + shift[2]*cell(0,2); - shiftDisp[1] = shift[0]*cell(1,0) + shift[1]*cell(1,1) + shift[2]*cell(1,2); - shiftDisp[2] = shift[0]*cell(2,0) + shift[1]*cell(2,1) + shift[2]*cell(2,2); - - // (D) Final neighbor position = unshifted position + shiftDisp - Vec3d posShifted = posNeighbor + shiftDisp; - return posShifted; -} - -size_t Structure::natoms() const { - return atoms.size(); -} - -size_t Structure::nn_size(size_t i) const { - return near_neigh_idx[i].size(); -} - -int Structure::read(std::ifstream &ifs) { - std::string line; - // ignore extra empty lines - std::getline(ifs,line); - if(line.empty()) return 1; - - // OK, process structural data - // read first line as a label - label = line; - - // the second line could be energy or - // a scalling factors eweight fweight sweight - std::getline(ifs,line); - std::stringstream stream(line); - std::string temp; - size_t count = 0; - while(stream >> temp) { ++count;} - - if (count == 3) { - stream.clear(); - stream.seekg(0, std::ios::beg); - stream >> eweight >> fweight >> sweight; - std::getline(ifs,line); - std::stringstream stream2(line); - // energy and T - stream2 >> energy; - if(stream2) stream2 >> T; - } - else if (count == 2) { - stream.clear(); - stream.seekg(0, std::ios::beg); - // energy and T - stream >> energy >> T; - } - else { - energy = std::stod(line); - } - - // 3 lines, 9 elements are for the cell matrix - for (int i=0; i<3; ++i) - for (int j=0; j<3; ++j) - ifs >> cell(i,j); - - // 3 lines, 9 elements are for the stress matrix - // if (use_stress) - for (int i=0; i<3; ++i) - for (int j=0; j<3; ++j) - ifs >> stress(i,j); - - // move to next line - std::getline(ifs,line); - - // make sure atoms vector is empty - atoms.clear(); - - char symbol[3]; - double px,py,pz,fx,fy,fz; - while(std::getline(ifs,line)) { - if(line.empty()) break; - //if(line == " ") break; - if(line == "\r") break; // detects windows newline - - std::istringstream tmp(line); - tmp >> symbol >> px >> py >> pz >> fx >> fy >> fz; - Element e = PeriodicTable::find_by_symbol(symbol); - atoms.push_back(Atom(e,px,py,pz,fx,fy,fz)); - //Atom &a = atoms.back(); - - //tmp >> a.label >> a.position(0) >> a.position(1) >> a.position(2); - // if (use_force) - //tmp >> a.force(0) >> a.force(1) >> a.force(2); - //std::cout << a.force(0) << " " << a.force(1) << " " << a.force(2) << std::endl;; - } - return 0; -} - -void Structure::read(const std::string fn) { - std::ifstream ifs(fn); - if (!ifs.is_open()) { - throw std::runtime_error("File does not exist: "+fn); - } - read(ifs); - ifs.close(); -} - -void Structure::save(std::ofstream &ofs) const { - ofs << label << std::endl; - ofs << eweight << " " << fweight << " " << sweight << std::endl; - ofs << energy << std::endl; - for (int i=0; i<3; ++i) { - ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,0); - ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,1); - ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,2); - ofs << std::endl; - } - - for (int i=0; i<3; ++i) { - ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,0); - ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,1); - ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,2); - ofs << std::endl; - } - - for (const Atom &a:atoms) { - ofs << a.symbol[0] << a.symbol[1]; - ofs << std::setw(w-2) << std::fixed << std::right - << std::setprecision(p) << a.position(0); - ofs << std::setw(w) << std::fixed << std::right - << std::setprecision(p) << a.position(1); - ofs << std::setw(w) << std::fixed << std::right - << std::setprecision(p) << a.position(2); - ofs << std::setw(w) << std::fixed << std::right - << std::setprecision(p) << a.force(0); - ofs << std::setw(w) << std::fixed << std::right - << std::setprecision(p) << a.force(1); - ofs << std::setw(w) << std::fixed << std::right - << std::setprecision(p) << a.force(1); - ofs << std::endl; - } -} - -void Structure::save(const std::string fn) const { - std::ofstream ofs(fn); - save(ofs); - ofs.close(); -} -size_t Structure::get_nn_iindex(const size_t i, const size_t j, const size_t jj) const { - Vec3d shift_ijj = -near_neigh_shift[i][jj]; - size_t ii = 0; - - for (size_t x=0; x<near_neigh_idx[j].size(); ++x) { - if (near_neigh_idx[j][x] == i) - if (near_neigh_shift[j][ii] == shift_ijj) - break; - ii++; - } - return ii; - -} -double Structure::get_volume() const { - return cell.row(0)*(cell.row(1).cross(cell.row(2))); -} -double Structure::get_density() const { - double V = get_volume(); - V*=1e-24; // convert to cm^3 - double amu = 1.66053906660e-24; // g - double mass = 0; - for (const auto& a:atoms) mass += PeriodicTable::get_mass(a.Z); - return amu*mass/V; -} - -double Structure::get_temperature() const { - return T; -} - -double Structure::get_virial_pressure() const { - return stress.trace()/get_volume()/3; -} - -double Structure::get_pressure(double T, double kB) const { - double vpress = get_virial_pressure(); - return vpress + natoms()*kB*T/get_volume(); -} -bool Structure::operator==(const Structure& st) const -{ - double EPSILON = std::numeric_limits<double>::epsilon(); - bool result = - cell.isApprox(st.cell, EPSILON) - && stress.isApprox(st.stress, EPSILON) - && natoms()==st.natoms() - && (std::fabs(eweight-st.eweight) < EPSILON) - && (std::fabs(fweight-st.fweight) < EPSILON) - && (std::fabs(sweight-st.sweight) < EPSILON) - && (std::fabs(energy-st.energy) < EPSILON) - ; - if (!result) return result; - for (size_t i=0;i<natoms();++i) { - result = atoms[i]==st.atoms[i]; - if (!result) return result; - } - return result; -} -bool Structure::is_the_same(const Structure& st, double thr) const -{ - bool result = - cell.isApprox(st.cell, thr) - && natoms()==st.natoms(); - if (!result) return result; - - size_t count=0;; - for (size_t i=0;i<natoms();++i) { - for (size_t j=i;j<natoms();++j) { - result = atoms[i].is_the_same(st.atoms[j], thr); - if (result) count++; - } - } - - return count==natoms() ? true : false; -} -int Structure::next_structure(std::ifstream &ifs) { - std::string line; - std::getline(ifs,line); - if(line.empty()) return 0; - - std::getline(ifs,line); - // the second line could be energy or - // a scalling factors eweight fweight sweight - std::stringstream stream(line); - std::string temp; - size_t count = 0; - while(stream >> temp) { ++count;} - // optional if second line is a weight - if (count==3) - std::getline(ifs,line); - - for (size_t i=0; i<6;++i) - std::getline(ifs,line); - - int natoms=0; - while(std::getline(ifs,line)) { - if(line.empty()) break; - if(line == "\r") break; // detects windows newline - natoms++; - } - return natoms; -} -void Structure::clear_nn() { - near_neigh_shift.clear(); - near_neigh_idx.clear(); -} - -std::vector<Atom>::iterator Structure::begin() { - return atoms.begin(); -} - -std::vector<Atom>::iterator Structure::end() { - return atoms.end(); -} - -std::vector<Atom>::const_iterator Structure::begin() const { - return atoms.cbegin(); -} - -std::vector<Atom>::const_iterator Structure::end() const { - return atoms.cend(); -} -void Structure::dump_to_file(std::ostream& file, size_t prec) const { - const int n = 5; - file << label << std::endl; - file << std::fixed << std::setprecision(prec); - file << eweight << " " << fweight << " " << sweight << std::endl; - file << energy << " " << T << std::endl; - - file - << std::setw(prec+n) << cell(0,0) << " " - << std::setw(prec+n) << cell(0,1) << " " - << std::setw(prec+n) << cell(0,2) << " " << std::endl - << std::setw(prec+n) << cell(1,0) << " " - << std::setw(prec+n) << cell(1,1) << " " - << std::setw(prec+n) << cell(1,2) << " " << std::endl - << std::setw(prec+n) << cell(2,0) << " " - << std::setw(prec+n) << cell(2,1) << " " - << std::setw(prec+n) << cell(2,2) << " " << std::endl; - - file - << std::setw(prec+n) << stress(0,0) << " " - << std::setw(prec+n) << stress(0,1) << " " - << std::setw(prec+n) << stress(0,2) << " " << std::endl - << std::setw(prec+n) << stress(1,0) << " " - << std::setw(prec+n) << stress(1,1) << " " - << std::setw(prec+n) << stress(1,2) << " " << std::endl - << std::setw(prec+n) << stress(2,0) << " " - << std::setw(prec+n) << stress(2,1) << " " - << std::setw(prec+n) << stress(2,2) << " " << std::endl; - - for (const auto& a : atoms) { - file << std::setw(2) << a.symbol << " " - << std::setw(prec+n) << a.position[0] << " " - << std::setw(prec+n) << a.position[1] << " " - << std::setw(prec+n) << a.position[2] << " " - << std::setw(prec+n) << a.force[0] << " " - << std::setw(prec+n) << a.force[1] << " " - << std::setw(prec+n) << a.force[2] << std::endl; - } - file << std::endl; -} -void Structure::dump_to_file(const std::string& filepath, size_t prec) const { - std::ofstream file(filepath, std::ios::app); // Open in append mode - if (!file.is_open()) { - std::cerr << "Error: Could not open file for writing: " << filepath << std::endl; - return; - } - dump_to_file(file,prec); - file.close(); -} -std::set<Element> Structure::get_unique_elements() const{ - std::set<Element> unique_elements; - for (const auto& a:atoms) unique_elements.insert(a); - return unique_elements; -} diff --git a/src/structure_db.cpp b/src/structure_db.cpp deleted file mode 100644 index f1aaa36f17971a77b3c18ab8a31e0653aab409fb..0000000000000000000000000000000000000000 --- a/src/structure_db.cpp +++ /dev/null @@ -1,356 +0,0 @@ -#include "tadah/core/element.h" -#include <tadah/mlip/structure_db.h> -#include <tadah/core/periodic_table.h> -#include <cstdio> -#include <cctype> // For std::isspace - -StructureDB::StructureDB() { - PeriodicTable::initialize(); -} -StructureDB::StructureDB(Config &config) { - PeriodicTable::initialize(); - add(config); -} - -void StructureDB::add(const std::string fn) { - std::ifstream ifs(fn); - if (!ifs.is_open()) { - throw std::runtime_error("DBFILE does not exist: "+fn); - } - parseFile(fn); - while (true) { - structures.push_back(Structure()); - int t = structures.back().read(ifs); - - // did we read structure succesfully? - // if not remove last object from the list - if (t==1) structures.pop_back(); - - if (ifs.eof()) break; - } - ifs.close(); -} -int StructureDB::add(const std::string fn, size_t first, int N) { - std::ifstream ifs(fn); - if (!ifs.is_open()) { - throw std::runtime_error("DBFILE does not exist: "+fn); - } - - // move iterator to the first structure we want to read - for (size_t i=0; i<first; ++i) { - Structure::next_structure(ifs); - } - - int count=0; - while (count++<N) { - structures.push_back(Structure()); - int t = structures.back().read(ifs); - - // did we read structure succesfully? - // if not remove last object from the list - if (t==1) structures.pop_back(); - - if (ifs.eof()) break; - } - ifs.close(); - - return --count; -} - -void StructureDB::add(const Structure &s) { - structures.push_back(s); -} -void StructureDB::add(const StructureDB &stdb) { - for (const auto &s: stdb) add(s); -} - -void StructureDB::remove(size_t i) { - structures.erase(structures.begin()+i); -} - -void StructureDB::add(Config &config) { - for (const std::string &s : config("DBFILE")) { - dbidx.push_back(size()); - add(s); - } - dbidx.push_back(size()); -} - -size_t StructureDB::size() const { - return structures.size(); -} - -size_t StructureDB::size(size_t n) const { - return dbidx[n+1]-dbidx[n]; -} - -Structure &StructureDB::operator()(size_t s) { - return structures[s]; -} - -const Structure &StructureDB::operator()(size_t s) const { - return structures[s]; -} - -Atom &StructureDB::operator()(size_t s, size_t a) { - return structures[s].atoms[a]; -} -size_t StructureDB::calc_natoms() const { - size_t natoms=0; - for (auto struc: structures) natoms += struc.natoms(); - return natoms; -} -size_t StructureDB::calc_natoms(size_t n) const { - size_t start = dbidx[n]; - size_t stop = dbidx[n+1]; - size_t natoms=0; - for (size_t i=start; i<stop; ++i) { - natoms += (*this)(i).natoms(); - } - return natoms; -} -std::set<Element> StructureDB::get_unique_elements() const { - std::set<Element> s; - for (const auto & st: structures) { - std::set<Element> u = st.get_unique_elements(); - s.insert(u.cbegin(),u.cend()); - } - return s; -} -std::set<Element> StructureDB::find_unique_elements(const std::string &fn) { - std::set<Element> s; - std::ifstream ifs(fn); - std::string line; - - if (!ifs.is_open()) { - std::cerr << "Could not open the file." << std::endl; - } - - char symbol[3]; - while (std::getline(ifs, line)) { - // the second line could be energy or - // a scalling factors eweight fweight sweight - std::getline(ifs,line); - std::stringstream stream(line); - size_t count = std::distance(std::istream_iterator<std::string>(stream), - std::istream_iterator<std::string>()); - - if (count == 3) - std::getline(ifs,line); - - for (size_t i=0; i<6; ++i) - std::getline(ifs,line); - - while (std::getline(ifs, line)) { - if(line.empty()) break; - sscanf(line.c_str(), "%2s", symbol); - s.insert(PeriodicTable::find_by_symbol(symbol)); - } - - } - return s; -} -std::set<Element> StructureDB::find_unique_elements(const Config &c) { - std::set<Element> s; - for (const auto& fn: c("DBFILE")) { - std::set<Element> temp = find_unique_elements(fn); - s.insert(temp.begin(), temp.end()); - } - return s; -} - -template <typename T,typename U> -std::pair<T,U> operator+(const std::pair<T,U> &l,const std::pair<T,U> &r) { - return {l.first+r.first,l.second+r.second}; -} -std::pair<int,int> StructureDB::count(const Config &config) { - std::pair<int, int> res=std::make_pair(0,0); - for (const std::string &fn : config("DBFILE")) { - res = res + count(fn); - } - return res; -} - -std::pair<int,int> StructureDB::count(const std::string fn){ - int nstruc=0; - int natoms_tot=0; - - std::ifstream ifs(fn); - if (!ifs.is_open()) { - throw std::runtime_error("DBFILE does not exist: "+fn); - } - int natoms = Structure::next_structure(ifs); - while(natoms) { - natoms_tot += natoms; - natoms = Structure::next_structure(ifs); - nstruc++; - } - - std::pair<int,int> res = std::make_pair(nstruc,natoms_tot); - return res; -} -void StructureDB::clear_nn() { - for (auto &struc: structures) struc.clear_nn(); -} -void StructureDB::check_atoms_key(Config &config, std::set<Element> &unique_elements) { - bool error=false; - - if (config.exist("ATOMS")) { - // user set this key so here we check does it correspond to unique_elements - if (unique_elements.size()!=config.size("ATOMS")) { - error=true; - } - - auto set_it = unique_elements.begin(); - auto atoms_it = config("ATOMS").begin(); - while (set_it != unique_elements.end() && atoms_it != config("ATOMS").end()) { - if (set_it->symbol != (*atoms_it)) { - error=true; - } - ++set_it; - ++atoms_it; - } - if (error) { - throw std::runtime_error("\n" - "Mismatch between elements in datasets and ATOMS in the config file.\n" - "Please either update the ATOMS in the config file or remove ATOMS\n" - "key completely. Tadah! will automatically configure this key.\n" - ); - } - } else { - for (const auto &s : unique_elements) config.add("ATOMS", s.symbol); - } -} -void StructureDB::check_watoms_key(Config &config, std::set<Element> &unique_elements) { - bool error=false; - - if (config.exist("WATOMS")) { - // user set this key so here we check does it correspond to unique_elements - if (unique_elements.size()!=config.size("WATOMS")) { - error=true; - } - if (error) { - throw std::runtime_error("\n" - "Mismatch between elements in datasets and WATOMS in the config file.\n" - "Please either update the WATOMS in the config file or remove WATOMS\n" - "key completely. In the latter case Tadah! will use default values.\n" - ); - } - } else { - for (const auto &s : unique_elements) config.add("WATOMS", s.Z); - } -} - -std::vector<Structure>::iterator StructureDB::begin() { - return structures.begin(); -} - -std::vector<Structure>::iterator StructureDB::end() { - return structures.end(); -} - -std::vector<Structure>::const_iterator StructureDB::begin() const { - return structures.cbegin(); -} - -std::vector<Structure>::const_iterator StructureDB::end() const { - return structures.cend(); -} -void StructureDB::dump_to_file(const std::string& filepath, size_t prec) const { - std::ofstream file(filepath, std::ios::app); // Open in append mode - if (!file.is_open()) { - std::cerr << "Error: Could not open file for writing: " << filepath << std::endl; - return; - } - for (const auto &s : structures) { - s.dump_to_file(file,prec); - } - file.close(); -} - -std::string StructureDB::summary() const { - std::string str = "# of structures : " + to_string(structures.size()); - - str += " | # of atoms : " + to_string(calc_natoms()); - - str += " | Elements : "; - std::set<Element> ue = get_unique_elements(); - for (const auto &e: ue) str+= e.symbol + " "; - str+="\n"; - return str; -} -void StructureDB::parseFile(const std::string& filename) -{ - std::ifstream fin(filename, std::ios::in | std::ios::binary); - if (!fin.is_open()) - { - std::cerr << "Error: could not open file " << filename << "\n"; - return; - } - - std::size_t header_size = 9; - - // Increase buffer size to speed up I/O on large files. - static const size_t BUFSIZE = 100ULL << 20; // 100 MiB - char* buffer = new char[BUFSIZE]; - fin.rdbuf()->pubsetbuf(buffer, BUFSIZE); - - std::vector<size_t> blockLineCounts; - blockLineCounts.reserve(10000); // Pre-allocate to reduce repeated allocations - - size_t currentBlockCount = 0; - std::string line; - - while (true) - { - if (!std::getline(fin, line)) - { - // End of file or read error - break; - } - - if (isBlankLine(line)) - { - // We reached the end of the current block - if (currentBlockCount > 0) - { - blockLineCounts.push_back(currentBlockCount-header_size); - currentBlockCount = 0; - } - } - else - { - // Non-empty line => belongs to the current block - currentBlockCount++; - } - } - - // If the last block didn’t end with a blank line, close it out - if (currentBlockCount > 0) - { - blockLineCounts.push_back(currentBlockCount-header_size); - } - - fin.close(); - delete[] buffer; - - // Print the results - std::cout << "Found " << blockLineCounts.size() << " blocks.\n"; - for (size_t i = 0; i < blockLineCounts.size(); i+=1000) - { - std::cout << "Block " << i << " has " - << blockLineCounts[i] << " atoms\n"; - } -} - -bool StructureDB::isBlankLine(const std::string& line) const -{ - for (char c : line) - { - if (!std::isspace(static_cast<unsigned char>(c))) - { - return false; - } - } - return true; -} diff --git a/src/vasp_outcar_reader.cpp b/src/vasp_outcar_reader.cpp index d1ead35e7ecb00f1c860d7559574f78e475af0e9..101b77a49dde7bb67fb1a3540021e21e4ca989d6 100644 --- a/src/vasp_outcar_reader.cpp +++ b/src/vasp_outcar_reader.cpp @@ -7,10 +7,16 @@ #include <iostream> #include <stdexcept> -VaspOutcarReader::VaspOutcarReader(StructureDB& db) : DatasetReader(db) {} +namespace tadah { +namespace mlip { -VaspOutcarReader::VaspOutcarReader(StructureDB& db, const std::string& filename) -: DatasetReader(db, filename), filename_(filename) { +VaspOutcarReader::VaspOutcarReader() : DatasetReader(db) {} + +VaspOutcarReader::VaspOutcarReader(StructureDB* db, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile) +: DatasetReader(db, filename, fileIndex, nStructsInFile) { read_data(filename); } @@ -52,7 +58,7 @@ void VaspOutcarReader::parse_data() { std::string type = line.substr(line.find("=") + 1); type = type.substr(0, type.find(":")); atom_types.push_back(type); - s.label += filename_ + " "; + s.label += filename + " "; } else if (line.find("NIONS") != std::string::npos) { @@ -189,15 +195,16 @@ void VaspOutcarReader::parse_data() { } if (complete_structure) { - stdb.add(s); + // stdb.add(s); complete_structure = false; - s = Structure(); + s = &(*stdb)(fileIndex_, ++structIdxInFile); + // s = Structure(); } } if (!stress_tensor_bool) { - for (auto &s : stdb) s.stress.set_zero(); + for (auto &s : *stdb) s.stress.set_zero(); } } @@ -206,5 +213,51 @@ void VaspOutcarReader::print_summary() const { } std::string VaspOutcarReader::get_summary() const { - return stdb.summary(); + return stdb->summary(); +} + +void VaspOutcarReader::pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) +{ + std::ifstream fin(filename); + if (!fin.is_open()) { + throw std::runtime_error("pass1Count: Could not open file " + filename); + } + + nStructures = 0; + atomsPerStructure.clear(); + + std::string line; + // We'll track the last known "Total number of ions in cell". + // Each time we see "Fractional coordinates of atoms", we assume + // that indicates a *new* Structure with lastNatoms atoms. + std::size_t lastNatoms = 0; + + while (std::getline(fin, line)) + { + // Example: parse "Total number of ions in cell" to get natoms + if (line.find("Total number of ions in cell") != std::string::npos) + { + // Very simplistic parse: the 8th token is the # of atoms. + std::istringstream iss(line); + std::string tmp; + int count = 0; + while (iss >> tmp) { + if (++count == 7) { + iss >> lastNatoms; + } + } + } + // Example: each time we see "Fractional coordinates of atoms", + // we treat that as the start of a new structure. + else if (line.find("Fractional coordinates of atoms") != std::string::npos) + { + nStructures++; + atomsPerStructure.push_back(lastNatoms); + } + } + fin.close(); +} +} } diff --git a/src/vasp_poscar_writer.cpp b/src/vasp_poscar_writer.cpp index 33144181c9849ec4779e063c081dd39e8c3592f9..42717afb167350b87815c43e2272d2f6521195d7 100644 --- a/src/vasp_poscar_writer.cpp +++ b/src/vasp_poscar_writer.cpp @@ -4,12 +4,15 @@ #include <fstream> -VaspPoscarWriter::VaspPoscarWriter(StructureDB& db) : DatasetWriter(db) {} +namespace tadah { +namespace mlip { -void VaspPoscarWriter::write_data(const std::string& filename, const size_t i) { +VaspPoscarWriter::VaspPoscarWriter() : DatasetWriter() {} - if (i >= stdb.size()) { - throw std::out_of_range("Index i is out of range."); +void VaspPoscarWriter::write_data(const StructureDB& stdb, const std::string& filename, size_t structIndex) { + + if (structIndex >= stdb->size()) { + throw std::out_of_range("Index structIndex is out of range."); } std::ofstream file(filename); @@ -17,7 +20,7 @@ void VaspPoscarWriter::write_data(const std::string& filename, const size_t i) { throw std::runtime_error("Could not open the file: " + filename); } - const Structure &st = stdb(i); + const Structure &st = stdb(structIndex); // write scaling factor file << st.label << std::endl; @@ -77,3 +80,5 @@ void VaspPoscarWriter::write_data(const std::string& filename, const size_t i) { file.close(); } +} // namespace mlip +} // namespace tadah diff --git a/src/vasp_vasprun_reader.cpp b/src/vasp_vasprun_reader.cpp index c6917d31174be17004b1f9e8be6c6ef6c14a04d6..b8abda8937e02385994ad81fa5f267b880dfbaf2 100644 --- a/src/vasp_vasprun_reader.cpp +++ b/src/vasp_vasprun_reader.cpp @@ -1,10 +1,16 @@ #include <tadah/mlip/dataset_readers/vasp_vasprun_reader.h> -VaspVasprunReader::VaspVasprunReader(StructureDB& stdb) -: DatasetReader(stdb), stdb(stdb) {} +namespace tadah { +namespace mlip { -VaspVasprunReader::VaspVasprunReader(StructureDB& stdb, const std::string& filename) -: DatasetReader(stdb, filename), stdb(stdb) { +VaspVasprunReader::VaspVasprunReader() +: DatasetReader() {} + +VaspVasprunReader::VaspVasprunReader(StructureDB* stdb, + const std::string& filename, + std::size_t fileIndex, + std::size_t nStructsInFile) +: DatasetReader(stdb, filename, fileIndex, nStructsInFile) { read_data(filename); } @@ -106,8 +112,9 @@ void VaspVasprunReader::extract_calculations(rx::xml_node<> *root_node) { extract_forces(calculation_node); _s.label += "Structure " + std::to_string(++counter); - stdb.add(_s); - _s = Structure(); // reset + // stdb.add(_s); + // _s = Structure(); // reset + _s = &(*stdb)(fileIndex_, ++structIdxInFile); } } @@ -238,5 +245,51 @@ void VaspVasprunReader::print_summary() const { } std::string VaspVasprunReader::get_summary() const { - return stdb.summary(); + return stdb->summary(); +} + +void VaspVasprunReader::pass1Count(const std::string &filename, + std::size_t &nStructures, + std::vector<std::size_t> &atomsPerStructure) +{ + std::ifstream fin(filename); + if (!fin.is_open()) { + throw std::runtime_error("pass1Count: Could not open file " + filename); + } + + nStructures = 0; + atomsPerStructure.clear(); + + std::string line; + // We'll track the last known "Total number of ions in cell". + // Each time we see "Fractional coordinates of atoms", we assume + // that indicates a *new* Structure with lastNatoms atoms. + std::size_t lastNatoms = 0; + + while (std::getline(fin, line)) + { + // Example: parse "Total number of ions in cell" to get natoms + if (line.find("Total number of ions in cell") != std::string::npos) + { + // Very simplistic parse: the 8th token is the # of atoms. + std::istringstream iss(line); + std::string tmp; + int count = 0; + while (iss >> tmp) { + if (++count == 7) { + iss >> lastNatoms; + } + } + } + // Example: each time we see "Fractional coordinates of atoms", + // we treat that as the start of a new structure. + else if (line.find("Fractional coordinates of atoms") != std::string::npos) + { + nStructures++; + atomsPerStructure.push_back(lastNatoms); + } + } + fin.close(); +} +} }