Skip to content
Snippets Groups Projects
Commit a1bc6b38 authored by mkirsz's avatar mkirsz
Browse files

CASTEP .castep reader with tests

parent d6582a4c
No related branches found
No related tags found
1 merge request!7Develop
Pipeline #48187 passed
#ifndef CASTEP_CASTEP_READER_H
#define CASTEP_CASTEP_READER_H
#include <tadah/mlip/structure.h>
#include <tadah/mlip/structure_db.h>
#include <tadah/mlip/dataset_readers/dataset_reader.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <stdexcept>
/**
* @class CastepCastepReader
* @brief A class for reading and parsing CASTEP .castep files.
*
* Implements data extraction and processing for molecular dynamics
* simulations (both constant and non-constant volume),
* geometry optimization, and single-point energy calculations.
*
* Supports concatenated .castep files, such as an MD run followed
* by a geometry optimization.
*
* The output units are:
* - eV for energy
* - Ångström for distance
* - eV/Å for force
* - eV/ų for pressure (converted from GPa)
* - ps for time
*
* If there is no stress tensor, it is set to zero.
*
* Example usage:
* @code
* StructureDB my_db;
* // Using the basic constructor
* CastepCastepReader reader1(my_db);
* reader1.read_data("test.castep");
* reader1.parse_data();
* reader1.print_summary();
*
* // Using the constructor with filename
* CastepCastepReader reader2(my_db, "test.castep");
* reader2.print_summary();
* @endcode
*/
class CastepCastepReader : public DatasetReader {
public:
/**
* @brief Constructs a CastepCastepReader with a StructureDB reference.
* @param db Reference to a StructureDB object for storing parsed data.
*/
CastepCastepReader(StructureDB& db);
/**
* @brief Constructs a CastepCastepReader and reads the specified file.
* @param db Reference to a StructureDB object for storing parsed data.
* @param filename Name of the .castep file to read.
*/
CastepCastepReader(StructureDB& db, const std::string& filename);
/**
* @brief Reads data from a specified .castep file.
* @param filename The name of the .castep file to read data from.
*/
virtual void read_data(const std::string& filename) override;
/**
* @brief Parses the data read from the .castep file.
*/
virtual void parse_data() override;
/**
* @brief Prints a summary of the parsed .castep data.
*/
virtual void print_summary() const override;
protected:
std::string raw_data_; /**< Stores raw file data */
std::string filename_; /**< Filename of the .castep file */
// Unit conversion factors
double p_conv = 0.00624150913; /**< Conversion factor from GPa to eV/A^3 */
};
#endif // CASTEP_CASTEP_READER_H
#include "tadah/core/element.h"
#include "tadah/core/utils.h"
#include <string>
#include <tadah/mlip/atom.h>
#include <tadah/mlip/structure.h>
#include <tadah/mlip/structure_db.h>
#include <tadah/mlip/dataset_readers/castep_castep_reader.h>
CastepCastepReader::CastepCastepReader(StructureDB& db) : DatasetReader(db) {}
CastepCastepReader::CastepCastepReader(StructureDB& db, const std::string& filename)
: DatasetReader(db, filename) {
read_data(filename);
parse_data();
}
void CastepCastepReader::read_data(const std::string& filename) {
std::ifstream file(filename);
if (!file.is_open()) {
throw std::runtime_error("Could not open the file: " + filename);
}
std::string line;
while (std::getline(file, line)) {
raw_data_ += line + "\n";
}
file.close();
}
void CastepCastepReader::parse_data() {
std::istringstream stream(raw_data_);
std::string line;
size_t natoms;;
bool stress_tensor_bool = false;
bool constant_volume = false;
bool complete_structure = false;
Structure s;
std::string label;
while (std::getline(stream, line)) {
if (line.find("Unit Cell") != std::string::npos) {
if (!std::getline(stream, line) || !std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading atom information" << std::endl;
}
s = Structure();
for (int i = 0; i < 3; ++i) {
if (!std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading lattice vectors at row " << i << std::endl;
break;
}
std::istringstream iss(line);
if (!(iss >> s.cell(0,i) >> s.cell(1,i) >> s.cell(2,i))) {
std::cerr << "Warning: Unexpected end of data when reading lattice vectors at row " << i << std::endl;
break;
}
}
if (constant_volume) label.clear();
constant_volume = false;
}
else if (line.find("Total number of ions in cell") != std::string::npos) {
// this line indicates new type of computation, so we reset flags here
std::istringstream iss(line);
std::string temp;
int count = 0;
// Process the line to reach the 8th element
while (iss >> temp) {
if (++count == 7 && !(iss >> natoms)) {
std::cerr << "Error: Failed to read number of atoms from line: " << line << std::endl;
}
}
// Check if the line was too short
if (count < 7) {
std::cerr << "Error: Line is too short to extract total number of ions in a cell. Line: " << line << std::endl;
}
stress_tensor_bool=false;
constant_volume=false;
}
else if (line.find("so fixing cell parameters") != std::string::npos) {
constant_volume=true;
label = "CASTEP MD, const. volume: true, step: 0";
}
else if (line.find("Starting MD iteration") != std::string::npos) {
std::istringstream iss(line);
std::string step;
iss >> step >> step >> step >> step;
std::ostringstream oss;
oss << std::boolalpha << constant_volume;
label = "CASTEP MD, const. volume: " + oss.str() + ", step: " + step;
}
else if (constant_volume && line.find("Cell Contents") != std::string::npos) {
s = Structure();
s.cell = stdb.structures.back().cell; // copy last cell as it is not repeated in castep for const. volume
}
else if (line.find("Fractional coordinates of atoms") != std::string::npos) {
if (!std::getline(stream, line) || !std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading atom information" << std::endl;
}
for (size_t i = 0; i < natoms; ++i) {
if (!std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading atomic coordinates at row " << i << std::endl;
break;
}
std::istringstream iss(line);
std::string type, tmp;
double px,py,pz;
if (!(iss >> tmp >> type >> tmp >> px >> py >> pz)) {
std::cerr << "Warning: Unexpected end of data when reading atomic coordiantes at row " << i << std::endl;
break;
}
s.add_atom(Atom(Element(type),px,py,pz,0,0,0));
s.atoms[i].position = s.cell * s.atoms[i].position; // convert to abs
}
}
else if (line.find("Cartesian components (eV/A)") != std::string::npos) {
if (!std::getline(stream, line) || !std::getline(stream, line) || !std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading atom forces" << std::endl;
}
for (size_t i = 0; i < natoms; ++i) {
if (!std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading atom forces: " << line << std::endl;
break;
}
std::istringstream iss(line);
std::string tmp;
double fx,fy,fz;
if (!(iss >> tmp >> tmp >> tmp >> fx >> fy >> fz)) {
std::cerr << "Warning: Unexpected end of data when reading atom forces: " << line << std::endl;
} else {
s.atoms[i].force = Vec3d(fx,fy,fz);
}
}
}
else if (line.find("Cartesian components (GPa)") != std::string::npos) {
if (!std::getline(stream, line) || !std::getline(stream, line) || !std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading stress tensor" << std::endl;
}
for (size_t i = 0; i < 3; ++i) {
if (!std::getline(stream, line)) {
std::cerr << "Warning: Unexpected end of data when reading stress tensor: " << line << std::endl;
break;
}
std::istringstream iss(line);
std::string tmp;
if (!(iss >> tmp >> tmp >> s.stress(i,0) >> s.stress(i,1) >> s.stress(i,2))) {
std::cerr << "Warning: Unexpected end of data when reading atom forces: " << line << std::endl;
}
}
stress_tensor_bool = true;
}
// Potential Energy // MD
// Total energy corrected for finite basis set = -2690.182502 eV // GO, first step only
// enthalpy= indicates end of iteration for GO
else if (line.find("Final free energy (E-TS)") != std::string::npos) {
std::istringstream iss(line);
std::string tmp;
if (!(iss >> tmp >> tmp >> tmp >> tmp >> tmp >> s.energy)) {
std::cerr << "Warning: Unexpected end of data when reading total energy" << std::endl;
}
}
else if (line.find("enthalpy=") != std::string::npos) {
// GO: the last extracted free energy is correct one!
std::istringstream iss(line);
std::string step;
iss >> step >> step >> step >> step; // safe as found enthalpy=
std::ostringstream oss;
oss << std::boolalpha << constant_volume;
s.label = "CASTEP GO, const. volume: " + oss.str() + ", step: " + step;
complete_structure = true;
}
else if (line.find("Potential Energy:") != std::string::npos) {
// MD: end of iteration
std::istringstream iss(line);
std::string tmp;
if (!(iss >> tmp >> tmp >> tmp >> s.energy)) {
std::cerr << "Warning: Unexpected end of data when reading total energy" << std::endl;
}
if (!label.size())label = "CASTEP MD, const. volume: false, step: 0"; // the last option
s.label = label;
complete_structure = true;
}
if (complete_structure) {
if (!stress_tensor_bool) {
s.stress.set_zero();
}
else {
s.stress *= p_conv; // GPa -> eV/A^3
}
stdb.add(s);
complete_structure = false;
}
}
}
void CastepCastepReader::print_summary() const {
std::cout << stdb;
}
#include <tadah/mlip/atom.h> #include <tadah/mlip/atom.h>
#include <tadah/mlip/structure.h> #include <tadah/mlip/structure.h>
#include <tadah/mlip/structure.h> #include <tadah/mlip/structure_db.h>
#include <tadah/mlip/dataset_readers/vasp_outcar_reader.h> #include <tadah/mlip/dataset_readers/vasp_outcar_reader.h>
#include <fstream> #include <fstream>
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <tadah/mlip/structure_db.h> #include <tadah/mlip/structure_db.h>
#include "tadah/mlip/dataset_readers/castep_md_reader.h" #include "tadah/mlip/dataset_readers/castep_md_reader.h"
#include "tadah/mlip/dataset_readers/castep_geom_reader.h" #include "tadah/mlip/dataset_readers/castep_geom_reader.h"
#include "tadah/mlip/dataset_readers/castep_castep_reader.h"
#include <tadah/mlip/dataset_readers/vasp_outcar_reader.h> #include <tadah/mlip/dataset_readers/vasp_outcar_reader.h>
#include <tadah/mlip/dataset_readers/vasp_vasprun_reader.h> #include <tadah/mlip/dataset_readers/vasp_vasprun_reader.h>
#include <filesystem> #include <filesystem>
...@@ -27,11 +28,13 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]") ...@@ -27,11 +28,13 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]")
std::string valid_vasprun_dir = "./tests_data/valid_vaspruns"; std::string valid_vasprun_dir = "./tests_data/valid_vaspruns";
std::string valid_castep_md_dir = "./tests_data/valid_castep_md"; std::string valid_castep_md_dir = "./tests_data/valid_castep_md";
std::string valid_castep_geom_dir = "./tests_data/valid_castep_geom"; std::string valid_castep_geom_dir = "./tests_data/valid_castep_geom";
std::string valid_castep_castep_dir = "./tests_data/valid_castep_castep";
std::vector<std::string> valid_outcar_files = get_all_files(valid_outcar_dir); std::vector<std::string> valid_outcar_files = get_all_files(valid_outcar_dir);
std::vector<std::string> valid_vasprun_files = get_all_files(valid_vasprun_dir); std::vector<std::string> valid_vasprun_files = get_all_files(valid_vasprun_dir);
std::vector<std::string> valid_castep_md_files = get_all_files(valid_castep_md_dir); std::vector<std::string> valid_castep_md_files = get_all_files(valid_castep_md_dir);
std::vector<std::string> valid_castep_geom_files = get_all_files(valid_castep_geom_dir); std::vector<std::string> valid_castep_geom_files = get_all_files(valid_castep_geom_dir);
std::vector<std::string> valid_castep_castep_files = get_all_files(valid_castep_castep_dir);
SECTION("Valid OUTCAR datasets - Constructor 1") { SECTION("Valid OUTCAR datasets - Constructor 1") {
for (const auto& filename : valid_outcar_files) { for (const auto& filename : valid_outcar_files) {
...@@ -125,4 +128,27 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]") ...@@ -125,4 +128,27 @@ TEST_CASE("Dataset Readers process datasets in directories", "[DatasetReaders]")
// Additional checks to confirm data validity // Additional checks to confirm data validity
} }
} }
SECTION("Valid CASTEP .castep datasets - Constructor 1") {
for (const auto& filename : valid_castep_castep_files) {
StructureDB db;
REQUIRE_NOTHROW(CastepCastepReader(db));
CastepCastepReader reader(db);
REQUIRE_NOTHROW(reader.read_data(filename));
REQUIRE_NOTHROW(reader.parse_data());
REQUIRE_NOTHROW(reader.print_summary());
// Additional checks to confirm data validity
}
}
SECTION("Valid CASTEP .castep datasets - Constructor 2") {
for (const auto& filename : valid_castep_castep_files) {
StructureDB db;
REQUIRE_NOTHROW(CastepCastepReader(db, filename));
CastepCastepReader reader(db, filename);
REQUIRE_NOTHROW(reader.print_summary());
// Additional checks to confirm data validity
}
}
} }
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment