diff --git a/include/tadah/mlip/dataset_readers/dataset_reader_selector.h b/include/tadah/mlip/dataset_readers/dataset_reader_selector.h new file mode 100644 index 0000000000000000000000000000000000000000..8976599bb5b7c4041efa31ee6bb2293ad5fd9d29 --- /dev/null +++ b/include/tadah/mlip/dataset_readers/dataset_reader_selector.h @@ -0,0 +1,33 @@ +#ifndef DATASET_READER_SELECTOR_H +#define DATASET_READER_SELECTOR_H + +#include <tadah/mlip/structure_db.h> +#include <tadah/mlip/dataset_readers/dataset_reader.h> +#include <string> +#include <memory> + +/** + * @class DatasetReaderSelector + * @brief Selects the appropriate DatasetReader based on file type. + */ +class DatasetReaderSelector { +public: + /** + * @brief Factory method to create specific DatasetReader objects. + * + * @param type The type of the dataset reader to create. + * @param db Reference to a StructureDB object to store parsed data. + * @return A unique pointer to a DatasetReader object. + */ + static std::unique_ptr<DatasetReader> get_reader(const std::string& type, StructureDB& db); + + /** + * @brief Determines the file type based on its content. + * + * @param filepath File path to check the content. + * @return A string representing the detected file type. + */ + static std::string determine_file_type_by_content(const std::string& filepath); +}; + +#endif // DATASET_READER_SELECTOR_H diff --git a/include/tadah/mlip/structure_db.h b/include/tadah/mlip/structure_db.h index 35c8f197c798e2fd38b436958a0d85d8e6d84d31..3f59b1dfd6b1a69ea7dae3ba3a9bd7258a877acf 100644 --- a/include/tadah/mlip/structure_db.h +++ b/include/tadah/mlip/structure_db.h @@ -39,31 +39,31 @@ * */ struct StructureDB { - std::vector<Structure> structures; + std::vector<Structure> structures; - /** Create an empty StructureDB object. */ - StructureDB(); + /** Create an empty StructureDB object. */ + StructureDB(); - /** Create this object and load structures + /** Create this object and load structures * listed in the config file * * \note * Required Config key: \ref DBFILE */ - StructureDB(Config &config); + StructureDB(Config &config); - /** Add structures listed in the config file + /** Add structures listed in the config file * * \note * Required Config key: \ref DBFILE * */ - void add(Config &config); + void add(Config &config); - /** Add all structures from a file */ - void add(const std::string fn); + /** Add all structures from a file */ + void add(const std::string fn); - /** Add N structures from a file begining from first index. + /** Add N structures from a file begining from first index. * * Indexing starts from zero, i.e. * first=0 corresponds to the first structure in the file. @@ -73,24 +73,24 @@ struct StructureDB { * * Return the number of loaded structures. */ - int add(const std::string fn, size_t first, int N); + int add(const std::string fn, size_t first, int N); - /** Add a single Structure object to this container */ - void add(const Structure &s); + /** Add a single Structure object to this container */ + void add(const Structure &s); - /** remove i-th Structure object from this container */ - void remove(size_t i); + /** remove i-th Structure object from this container */ + void remove(size_t i); - /** \return number of structures held by this object */ - size_t size() const; + /** \return number of structures held by this object */ + size_t size() const; - /** \return number of structures in the n-th DBFILE + /** \return number of structures in the n-th DBFILE * * n={0,...,number of DBFILEs-1} */ - size_t size(size_t n) const; + size_t size(size_t n) const; - /** \return reference to the s-th structure + /** \return reference to the s-th structure * * Usage example: * @@ -98,10 +98,10 @@ struct StructureDB { * # and bind it to Structure st. * Structure &st = st(1); */ - Structure& operator()(size_t s); - const Structure& operator()(size_t s) const; + Structure& operator()(size_t s); + const Structure& operator()(size_t s) const; - /** \return reference to the a-th atom in the s-th structure + /** \return reference to the a-th atom in the s-th structure * * Usage example: * @@ -109,58 +109,61 @@ struct StructureDB { * # held by this object and bind it to the atom object. * Atom &atom = st(2,4); */ - Atom& operator()(size_t s, size_t a); + Atom& operator()(size_t s, size_t a); - /** Print this object summary to the stream */ - friend std::ostream& operator<<(std::ostream& os, const StructureDB& stdb) - { - os << "# of structures : " << std::left << stdb.structures.size() << std::endl; - return os; - } + /** Print this object summary to the stream */ + friend std::ostream& operator<<(std::ostream& os, const StructureDB& stdb) + { + os << "# of structures : " << std::left << stdb.structures.size() << std::endl; + return os; + } - /** Store indices for each dataset. + /** Store indices for each dataset. * * e.g. if 3 datasets of sizes 11,13,15 * dbidx=={0,11,24,39} */ - std::vector<size_t> dbidx; + std::vector<size_t> dbidx; - /** Calculate total number of atoms stored by this object. */ - size_t calc_natoms() const; + /** Calculate total number of atoms stored by this object. */ + size_t calc_natoms() const; - /** Calculate total number of atoms in the n-th DBFILE. + /** Calculate total number of atoms in the n-th DBFILE. * * n={0,...,number of DBFILEs-1} */ - size_t calc_natoms(size_t n) const; + size_t calc_natoms(size_t n) const; - /** Return unique elements for all Structures. */ - std::set<Element> get_unique_elements() const; + /** Return unique elements for all Structures. */ + std::set<Element> get_unique_elements() const; - /** Find unique elements in provided Config file */ - static std::set<Element> find_unique_elements(const Config &c); + /** Find unique elements in provided Config file */ + static std::set<Element> find_unique_elements(const Config &c); - /** Find unique elements in provided file */ - static std::set<Element> find_unique_elements(const std::string &fn); + /** Find unique elements in provided file */ + static std::set<Element> find_unique_elements(const std::string &fn); - /** Count number of structures and atoms in all datasets from the Config file. */ - static std::pair<int,int> count(const Config &c); + /** Count number of structures and atoms in all datasets from the Config file. */ + static std::pair<int,int> count(const Config &c); - /** Count number of structures and atoms in a single dataset. */ - static std::pair<int,int> count(const std::string fn); + /** Count number of structures and atoms in a single dataset. */ + static std::pair<int,int> count(const std::string fn); - void clear_nn(); + void clear_nn(); - /** Check consistency of the ATOMS key. */ - static void check_atoms_key(Config &config, std::set<Element> &unique_elements); + /** Check consistency of the ATOMS key. */ + static void check_atoms_key(Config &config, std::set<Element> &unique_elements); - /** Check consistency of the WATOMS key. Add if missing*/ - static void check_watoms_key(Config &config, std::set<Element> &unique_elements); + /** Check consistency of the WATOMS key. Add if missing*/ + static void check_watoms_key(Config &config, std::set<Element> &unique_elements); - // Methods to enable range-based for loop - std::vector<Structure>::iterator begin(); - std::vector<Structure>::iterator end(); - std::vector<Structure>::const_iterator begin() const; - std::vector<Structure>::const_iterator end() const; + // Methods to enable range-based for loop + std::vector<Structure>::iterator begin(); + std::vector<Structure>::iterator end(); + std::vector<Structure>::const_iterator begin() const; + std::vector<Structure>::const_iterator end() const; + + /** Method to dump class content to a file */ + void dump_to_file(const std::string& filepath, size_t prec=12) const; }; #endif diff --git a/src/dataset_reader_selector.cpp b/src/dataset_reader_selector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b3c13fd9d793760852aed88065b01955caa0d5d2 --- /dev/null +++ b/src/dataset_reader_selector.cpp @@ -0,0 +1,62 @@ +#include <tadah/mlip/dataset_readers/dataset_reader_selector.h> +#include <tadah/mlip/dataset_readers/castep_castep_reader.h> +#include <tadah/mlip/dataset_readers/castep_md_reader.h> +#include <tadah/mlip/dataset_readers/castep_geom_reader.h> +#include <tadah/mlip/dataset_readers/vasp_outcar_reader.h> +#include <tadah/mlip/dataset_readers/vasp_vasprun_reader.h> + +#include <fstream> +#include <iostream> + +// Factory method implementation +std::unique_ptr<DatasetReader> DatasetReaderSelector::get_reader(const std::string& filepath, StructureDB& db) { + + std::string type = determine_file_type_by_content(filepath); + + if (type == "CASTEP.CASTEP") { + return std::make_unique<CastepCastepReader>(db,filepath); + } else if (type == "CASTEP.MD") { + return std::make_unique<CastepMDReader>(db,filepath); + } else if (type == "CASTEP.GEOM") { + return std::make_unique<CastepGeomReader>(db,filepath); + } else if (type == "VASP.VASPRUN") { + return std::make_unique<VaspVasprunReader>(db,filepath); + } else if (type == "VASP.OUTCAR") { + return std::make_unique<VaspOutcarReader>(db,filepath); + } else { + std::cerr << "Unknown type! Returning nullptr." << std::endl; + return nullptr; + } + +} + +// Function to determine the file type based on content +std::string DatasetReaderSelector::determine_file_type_by_content(const std::string& filepath) { + std::ifstream file(filepath); + if (!file.is_open()) { + std::cerr << "Could not open file: " << filepath << std::endl; + return "Unknown file type"; + } + + std::string line; + while (std::getline(file, line)) { + + if (line.find("vasp") != std::string::npos) { + if (line.find("incar:") != std::string::npos || line.find("outcar") != std::string::npos) { + return "VASP.OUTCAR"; + } else if (line.find("<modeling>") != std::string::npos || line.find("<calculation>") != std::string::npos) { + return "VASP.VASPRUN"; + } + } + + if (line.find("<-- c") != std::string::npos) { + return "CASTEP.GEOM"; + } else if (line.find("<-- hv") != std::string::npos) { + return "CASTEP.MD"; + } else if (line.find("Unit Cell") != std::string::npos) { + return "CASTEP.CASTEP"; + } + } + + return "Unknown file type"; +} diff --git a/src/structure_db.cpp b/src/structure_db.cpp index 5966f683c35612380528a0d9f1e1a2835aca9e80..8e15a289d4cbefae7f77a6fb8d28705084902def 100644 --- a/src/structure_db.cpp +++ b/src/structure_db.cpp @@ -248,3 +248,52 @@ std::vector<Structure>::const_iterator StructureDB::begin() const { std::vector<Structure>::const_iterator StructureDB::end() const { return structures.cend(); } +void StructureDB::dump_to_file(const std::string& filepath, size_t prec) const { + std::ofstream file(filepath, std::ios::app); // Open in append mode + if (!file.is_open()) { + std::cerr << "Error: Could not open file for writing: " << filepath << std::endl; + return; + } + const int n = 5; + for (const auto &s : structures) { + file << s.label << std::endl; + file << std::fixed << std::setprecision(prec); + file << s.eweight << " " << s.fweight << " " << s.sweight << std::endl; + file << s.energy << std::endl; + + file + << std::setw(prec+n) << s.cell(0,0) << " " + << std::setw(prec+n) << s.cell(0,1) << " " + << std::setw(prec+n) << s.cell(0,2) << " " << std::endl + << std::setw(prec+n) << s.cell(1,0) << " " + << std::setw(prec+n) << s.cell(1,1) << " " + << std::setw(prec+n) << s.cell(1,2) << " " << std::endl + << std::setw(prec+n) << s.cell(2,0) << " " + << std::setw(prec+n) << s.cell(2,1) << " " + << std::setw(prec+n) << s.cell(2,2) << " " << std::endl; + + file + << std::setw(prec+n) << s.stress(0,0) << " " + << std::setw(prec+n) << s.stress(0,1) << " " + << std::setw(prec+n) << s.stress(0,2) << " " << std::endl + << std::setw(prec+n) << s.stress(1,0) << " " + << std::setw(prec+n) << s.stress(1,1) << " " + << std::setw(prec+n) << s.stress(1,2) << " " << std::endl + << std::setw(prec+n) << s.stress(2,0) << " " + << std::setw(prec+n) << s.stress(2,1) << " " + << std::setw(prec+n) << s.stress(2,2) << " " << std::endl; + + for (const auto& a : s.atoms) { + file << std::setw(2) << a.symbol << " " + << std::setw(prec+n) << a.position[0] << " " + << std::setw(prec+n) << a.position[1] << " " + << std::setw(prec+n) << a.position[2] << " " + << std::setw(prec+n) << a.force[0] << " " + << std::setw(prec+n) << a.force[1] << " " + << std::setw(prec+n) << a.force[2] << std::endl; + } + file << std::endl; + } + + file.close(); +}