Skip to content
Snippets Groups Projects
Commit 495106bb authored by mkirsz's avatar mkirsz
Browse files

Working CASTEP .md reader

parent 789d4d36
No related branches found
No related tags found
1 merge request!7Develop
Pipeline #48108 passed
#ifndef CASTEP_MD_READER_H
#define CASTEP_MD_READER_H
#include <tadah/mlip/structure.h>
#include <tadah/mlip/structure_db.h>
#include <tadah/mlip/dataset_readers/dataset_reader.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <stdexcept>
/**
* @class CastepMDReader
* @brief A class for reading and parsing CASTEP .md files.
*
* Implements data extraction and processing for molecular dynamics
* simulations by converting data from atomic units to common units:
* eV for energy, Ångström for distance, eV/Å for force, and eV/ų
* for pressure.
*
* Example usage:
* @code
* StructureDB my_db;
* // Using the basic constructor
* CastepMDReader reader1(my_db);
* reader1.read_data("test.md");
* reader1.parse_data();
* reader1.print_summary();
*
* // Using the constructor with filename
* CastepMDReader reader2(my_db, "test.md");
* reader2.print_summary();
* @endcode
*/
class CastepMDReader : public DatasetReader {
public:
/**
* @brief Constructs a CastepMDReader with a StructureDB reference.
* @param db Reference to a StructureDB object for storing parsed data.
*/
CastepMDReader(StructureDB& db);
/**
* @brief Constructs a CastepMDReader and reads the specified file.
* @param db Reference to a StructureDB object for storing parsed data.
* @param filename Name of the .md file to read.
*/
CastepMDReader(StructureDB& db, const std::string& filename);
/**
* @brief Reads data from a specified .md file.
* @param filename The name of the .md file to read data from.
*/
void read_data(const std::string& filename) override;
/**
* @brief Parses the data read from the .md file.
*/
void parse_data() override;
/**
* @brief Prints a summary of the parsed .md data.
*/
void print_summary() const override;
private:
std::string raw_data_; /**< Stores raw file data */
std::string filename_; /**< Filename of the .md file */
// Unit conversion factors
double e_conv = 27.211386245988; /**< Conversion factor from Hartree to eV */
double d_conv = 0.529177210903; /**< Conversion factor from Bohr to Ångström */
double f_conv = e_conv / d_conv; /**< Conversion factor from Hartree/Bohr to eV/Å */
double s_conv = e_conv / (d_conv * d_conv * d_conv); /**< Conversion factor from Hartree/Bohr³ to eV/ų */
double k_b = 8.617333262145e-5 / e_conv; /**< Boltzmann constant in atomic units (Hartree/K) */
// Helper methods
/**
* @brief Checks if a string ends with a given suffix.
* @param str The string to check.
* @param suffix The suffix to check for.
* @return True if str ends with suffix, otherwise false.
*/
bool ends_with(const std::string& str, const std::string& suffix);
/**
* @brief Calculates the ideal gas pressure.
* @param s Reference to a Structure object.
* @param T Temperature in atomic units.
* @return Calculated ideal gas pressure.
*/
double calc_P_ideal(Structure &s, double T);
/**
* @brief Performs post-processing on the given structure.
* @param s Reference to a Structure object.
*/
void postproc_structure(Structure &s);
double T = 0; /**< Temperature in atomic units (Hartree/k_B) */
bool stress_tensor_bool = false; /**< Indicates presence of a stress tensor */
};
#endif // CASTEP_MD_READER_H
#include <tadah/core/utils.h>
#include <tadah/mlip/dataset_readers/castep_md_reader.h>
CastepMDReader::CastepMDReader(StructureDB& db)
: DatasetReader(db) {}
CastepMDReader::CastepMDReader(StructureDB& db, const std::string& filename)
: DatasetReader(db, filename) {
read_data(filename);
parse_data();
}
void CastepMDReader::read_data(const std::string& filename) {
filename_ = filename;
std::ifstream file(filename);
if (!file.is_open()) {
throw std::runtime_error("Could not open the file: " + filename);
}
std::string line;
while (std::getline(file, line)) {
raw_data_ += line + "\n";
}
file.close();
}
bool CastepMDReader::ends_with(const std::string& str, const std::string& suffix) {
return str.size() >= suffix.size() &&
str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
}
double CastepMDReader::calc_P_ideal(Structure &s, double T) {
double V = s.get_volume(); // Bohr^3
int N = s.natoms();
return N*k_b*T/V;
}
void CastepMDReader::postproc_structure(Structure &s) {
if (!stress_tensor_bool) {
s.stress.set_zero();
} else {
// stress in .md file contains kinetic contributions
// so we remove it to obtain pure virial stress tensor
// P = N k_b T / V
s.stress -= calc_P_ideal(s,T);
}
// finish conversion
s.cell *= d_conv;
s.stress *= s_conv;
// add to database
stdb.add(s);
// reset
stress_tensor_bool = false;
s = Structure();
}
void CastepMDReader::parse_data() {
std::istringstream stream(raw_data_);
std::string line;
Structure s;
size_t cell_idx=0;
size_t stress_idx=0;
size_t force_idx=0;
// skip header if exists also deal with the case
// where there is no blank line at the begining of the .md file
std::string time;
while (std::getline(stream, line)) {
std::istringstream iss(line);
if (ends_with(line,"<-- E")) {
s.label = "Filename: "+filename_+ " | Units: (eV, Angstrom) | Time: " + time;
iss >> s.energy;
s.energy *= e_conv;
break;
}
iss >> time;
}
while (std::getline(stream, line)) {
if (ends_with(line,"<-- T")) {
std::istringstream iss(line);
iss >> T;
T /= k_b;
}
else if (ends_with(line,"<-- E")) {
std::istringstream iss(line);
iss >> s.energy;
s.energy *= e_conv;
}
else if (ends_with(line,"<-- h")) {
// the current matrix of cell vectors
std::istringstream iss(line);
if (!(iss >> s.cell(0,cell_idx) >> s.cell(1,cell_idx) >> s.cell(2,cell_idx)))
std::cerr << "Warning: Unexpected end of data when reading lattice vectors at row " << cell_idx << std::endl;
cell_idx++;
}
else if (ends_with(line,"<-- S")) {
// The full pressure tensor (including kinetic contributions)
std::istringstream iss(line);
if (!(iss >> s.stress(0,stress_idx) >> s.stress(1,stress_idx) >> s.stress(2,stress_idx)))
std::cerr << "Warning: Unexpected end of data when reading stress tensor at row " << stress_idx << std::endl;
stress_idx++;
stress_tensor_bool = true;
}
else if (ends_with(line,"<-- R")) {
// First the position vectors of all ions are printed with the label <-- R.
std::string element;
int temp;
double px, py, pz;
std::istringstream iss(line);
if (!(iss >> element >> temp >> px >> py >> pz))
std::cerr << "Warning: Unexpected end of data when reading atomic positions:\n" << line << std::endl;
s.add_atom(Atom(Element(element),px*d_conv,py*d_conv,pz*d_conv,0,0,0));
}
else if (ends_with(line,"<-- F")) {
std::string element;
int temp;
double fx, fy, fz;
std::istringstream iss(line);
if (!(iss >> element >> temp >> fx >> fy >> fz))
std::cerr << "Warning: Unexpected end of data when reading atomic forces:\n" << line << std::endl;
s.atoms[force_idx].force[0]=fx*f_conv;
s.atoms[force_idx].force[1]=fy*f_conv;
s.atoms[force_idx].force[2]=fz*f_conv;
force_idx++;
}
else if (is_blank_line(line)) {
postproc_structure(s);
if (std::getline(stream, line)) {
std::string time;
std::istringstream iss(line);
iss >> time;
s.label = "Time: " + time;
}
cell_idx=0;
stress_idx=0;
force_idx=0;
}
}
// in case there is no blank line at the end we have to add last strcuture here
if (s.natoms()) {
postproc_structure(s);
}
}
void CastepMDReader::print_summary() const {
std::cout << stdb;
}
...@@ -88,7 +88,12 @@ void VasprunReader::extract_calculations(rx::xml_node<> *root_node) { ...@@ -88,7 +88,12 @@ void VasprunReader::extract_calculations(rx::xml_node<> *root_node) {
calculation_node; calculation_node = calculation_node->next_sibling("calculation")) { calculation_node; calculation_node = calculation_node->next_sibling("calculation")) {
extract_total_energy(calculation_node); extract_total_energy(calculation_node);
extract_stress_tensor(calculation_node); extract_stress_tensor(calculation_node);
if (!stress_tensor_bool) {
_s.stress.set_zero();
}
stress_tensor_bool = false;
auto structure_node = calculation_node->first_node("structure"); auto structure_node = calculation_node->first_node("structure");
while (structure_node) { while (structure_node) {
...@@ -101,12 +106,6 @@ void VasprunReader::extract_calculations(rx::xml_node<> *root_node) { ...@@ -101,12 +106,6 @@ void VasprunReader::extract_calculations(rx::xml_node<> *root_node) {
stdb.add(_s); stdb.add(_s);
_s = Structure(); // reset _s = Structure(); // reset
} }
if (!stress_tensor_bool) {
for (auto& s : stdb) {
s.stress.set_zero();
}
}
} }
void VasprunReader::extract_total_energy(rx::xml_node<> *calculation_node) { void VasprunReader::extract_total_energy(rx::xml_node<> *calculation_node) {
......
#include "catch2/catch.hpp" #include "catch2/catch.hpp"
#include "tadah/mlip/dataset_readers/castep_md_reader.h"
#include <tadah/mlip/structure_db.h> #include <tadah/mlip/structure_db.h>
#include <tadah/mlip/dataset_readers/outcar_reader.h> #include <tadah/mlip/dataset_readers/outcar_reader.h>
#include <tadah/mlip/dataset_readers/vasprun_reader.h> #include <tadah/mlip/dataset_readers/vasprun_reader.h>
...@@ -23,9 +24,11 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]") ...@@ -23,9 +24,11 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]")
std::string valid_outcar_dir = "./tests_data/valid_outcars"; std::string valid_outcar_dir = "./tests_data/valid_outcars";
std::string valid_vasprun_dir = "./tests_data/valid_vaspruns"; std::string valid_vasprun_dir = "./tests_data/valid_vaspruns";
std::string valid_castep_md_dir = "./tests_data/valid_castep_md";
std::vector<std::string> valid_outcar_files = get_all_files(valid_outcar_dir); std::vector<std::string> valid_outcar_files = get_all_files(valid_outcar_dir);
std::vector<std::string> valid_vasprun_files = get_all_files(valid_vasprun_dir); std::vector<std::string> valid_vasprun_files = get_all_files(valid_vasprun_dir);
std::vector<std::string> valid_castep_md_files = get_all_files(valid_castep_md_dir);
SECTION("Valid OUTCAR datasets - Constructor 1") { SECTION("Valid OUTCAR datasets - Constructor 1") {
for (const auto& filename : valid_outcar_files) { for (const auto& filename : valid_outcar_files) {
...@@ -73,4 +76,27 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]") ...@@ -73,4 +76,27 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]")
// Additional checks to confirm data validity // Additional checks to confirm data validity
} }
} }
SECTION("Valid CASTEP .md datasets - Constructor 1") {
for (const auto& filename : valid_castep_md_files) {
StructureDB db;
REQUIRE_NOTHROW(CastepMDReader(db));
CastepMDReader reader(db);
REQUIRE_NOTHROW(reader.read_data(filename));
REQUIRE_NOTHROW(reader.parse_data());
REQUIRE_NOTHROW(reader.print_summary());
// Additional checks to confirm data validity
}
}
SECTION("Valid CASTEP .md datasets - Constructor 2") {
for (const auto& filename : valid_castep_md_files) {
StructureDB db;
REQUIRE_NOTHROW(CastepMDReader(db, filename));
CastepMDReader reader(db, filename);
REQUIRE_NOTHROW(reader.print_summary());
// Additional checks to confirm data validity
}
}
} }
BEGIN header
END header
0.0000000000000000E+000
-3.1438056609022318E+001 -3.1376043990507860E+001 2.5277616819407205E-002 <-- E
2.1064680682839339E-003 <-- T
1.4675541909861883E-004 <-- P
1.0261212863294524E+001 0.0000000000000000E+000 0.0000000000000000E+000 <-- h
0.0000000000000000E+000 1.0261212863294524E+001 0.0000000000000000E+000 <-- h
0.0000000000000000E+000 0.0000000000000000E+000 1.0261212863294524E+001 <-- h
0.0000000000000000E+000 0.0000000000000000E+000 0.0000000000000000E+000 <-- hv
0.0000000000000000E+000 0.0000000000000000E+000 0.0000000000000000E+000 <-- hv
0.0000000000000000E+000 0.0000000000000000E+000 0.0000000000000000E+000 <-- hv
5.3947667434339923E-005 3.5567533768348789E-005 8.1607367238056459E-005 <-- S
3.5567533768348789E-005 -2.2263123614334223E-004 1.5275407397741958E-004 <-- S
8.1607367238056459E-005 1.5275407397741958E-004 -2.7158268858685418E-004 <-- S
Si 1 0.0000000000000000E+000 0.0000000000000000E+000 0.0000000000000000E+000 <-- R
Si 2 0.0000000000000000E+000 5.1306064316472622E+000 5.2845246245966804E+000 <-- R
Si 3 5.1306064316472622E+000 0.0000000000000000E+000 5.2332185602802079E+000 <-- R
Si 4 5.1306064316472622E+000 5.1306064316472622E+000 0.0000000000000000E+000 <-- R
Si 5 7.7985217761038399E+000 2.5653032158236311E+000 7.6959096474708941E+000 <-- R
Si 6 2.4626910871906857E+000 2.5653032158236311E+000 2.5653032158236311E+000 <-- R
Si 7 2.5653032158236311E+000 7.7985217761038399E+000 7.6959096474708941E+000 <-- R
Si 8 7.6959096474708941E+000 7.5932975188379492E+000 2.5653032158236311E+000 <-- R
Si 1 1.6461326428344710E-004 4.5089543337825654E-005 1.2682182569746878E-004 <-- V
Si 2 8.4803099578915976E-005 1.4834634371448861E-004 3.0659125052563604E-004 <-- V
Si 3 2.6822431940035069E-004 1.0992456024232732E-004 8.0842303031580565E-005 <-- V
Si 4 1.2525899091439435E-004 3.4012614699403934E-004 7.2047643415800615E-005 <-- V
Si 5 2.5769632562329018E-004 1.4356687589002942E-004 1.2440786533203562E-004 <-- V
Si 6 2.3202268466799414E-004 2.2763815374104187E-004 1.6976252834642846E-004 <-- V
Si 7 2.6812969910198966E-004 3.2072761452135387E-004 1.5759444939777658E-004 <-- V
Si 8 1.0324171598127208E-004 1.2993469638267985E-004 3.5752302590864274E-004 <-- V
Si 1 -4.7485150541330739E-003 -6.4256923548756568E-003 6.2722754436370046E-003 <-- F
Si 2 7.3746443515239476E-003 6.2703809926870444E-003 -1.9171200407951994E-002 <-- F
Si 3 6.0962550453277685E-003 6.6102141678239610E-003 -2.2571480684108403E-002 <-- F
Si 4 -5.2031504260241909E-003 -4.5801487835229818E-003 -4.7066214646812685E-003 <-- F
Si 5 -1.6513435889254458E-002 -1.7331856195184308E-003 1.0707424806046284E-002 <-- F
Si 6 1.1356941198022526E-002 1.1181594167513791E-003 9.8361446023216993E-003 <-- F
Si 7 3.0006418651752917E-003 -1.1410274127633430E-002 1.0488620245673706E-002 <-- F
Si 8 -1.3633810906378125E-003 1.0150546308288114E-002 9.1448374590629693E-003 <-- F
8.2682746688379041E+001
-3.1439975473797670E+001 -3.1374678859949793E+001 2.4052714949383245E-002 <-- E
2.0043929124486039E-003 <-- T
8.7348209776674999E-005 <-- P
1.0324059500983507E+001 0.0000000000000000E+000 0.0000000000000000E+000 <-- h
0.0000000000000000E+000 1.0324059500983507E+001 0.0000000000000000E+000 <-- h
0.0000000000000000E+000 0.0000000000000000E+000 1.0324059500983507E+001 <-- h
1.1795963543494103E-004 0.0000000000000000E+000 0.0000000000000000E+000 <-- hv
0.0000000000000000E+000 1.1795963543494103E-004 0.0000000000000000E+000 <-- hv
0.0000000000000000E+000 0.0000000000000000E+000 1.1795963543494103E-004 <-- hv
1.0458765776711853E-004 3.9720621748951958E-005 8.1930160177035762E-005 <-- S
3.9720621748951958E-005 -1.6019777020622887E-004 1.5037016694847971E-004 <-- S
8.1930160177035762E-005 1.5037016694847971E-004 -2.0643451689091470E-004 <-- S
Si 1 1.2744294941511641E-002 3.1475895044957244E-003 1.0483557256432526E-002 <-- R
Si 2 7.2231355375472414E-003 5.1742211831890303E+000 5.3399352052581461E+000 <-- R
Si 3 5.1837220163970112E+000 9.1653662268827915E-003 5.2701736763925622E+000 <-- R
Si 4 5.1716207784861759E+000 5.1887125276353441E+000 5.4018817715452746E-003 <-- R
Si 5 7.8656278905030277E+000 2.5922910465500344E+000 7.7536336562302450E+000 <-- R
Si 6 2.4969462041692831E+000 2.5991531829393630E+000 2.5951445151261545E+000 <-- R
Si 7 2.6024923178572479E+000 7.8709712781653494E+000 7.7562524072486578E+000 <-- R
Si 8 7.7511457020375323E+000 7.6507943289515987E+000 2.6099972908325002E+000 <-- R
Si 1 1.5331190519307433E-004 3.4894634394765298E-005 1.3547550516623848E-004 <-- V
Si 2 9.6392864821422681E-005 1.5557205280474068E-004 2.6755621083043906E-004 <-- V
Si 3 2.7141915149831943E-004 1.1945814360504374E-004 4.2948865693523264E-005 <-- V
Si 4 1.1490162732095732E-004 3.2405725428895436E-004 6.3530559628624742E-005 <-- V
Si 5 2.2380439611879922E-004 1.3708831797473156E-004 1.4010649582267966E-004 <-- V
Si 6 2.4429731556525181E-004 2.2404164931934104E-004 1.8111056684752262E-004 <-- V
Si 7 2.6625495463489766E-004 2.9407254229973787E-004 1.7037584469949238E-004 <-- V
Si 8 9.9377449639603458E-005 1.4281825472689115E-004 3.6272354679770376E-004 <-- V
Si 1 -4.4114263236281984E-003 -4.6910230783696197E-003 7.7197180272582178E-003 <-- F
Si 2 9.0177288696785003E-003 6.5842250734736750E-003 -1.9696943205593471E-002 <-- F
Si 3 5.1791328140053435E-003 7.9823457142640351E-003 -2.1142145815895456E-002 <-- F
Si 4 -3.8982262065240855E-003 -5.5169132982109620E-003 -3.6318679599861700E-003 <-- F
Si 5 -1.7468137533968822E-002 -2.1600573057214351E-003 1.1759717884650078E-002 <-- F
Si 6 9.9283945440579811E-003 8.0297166277106783E-004 8.6076342461942242E-003 <-- F
Si 7 2.1254457719268472E-003 -1.2057481153662996E-002 9.3572717224040154E-003 <-- F
Si 8 -4.7291193554756771E-004 9.0559323854562356E-003 7.0266151009685572E-003 <-- F
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment