From 58d653ef734813c188b22d248922df1c5c51b630 Mon Sep 17 00:00:00 2001 From: Marcin Kirsz <mkirsz@ed.ac.uk> Date: Tue, 4 Mar 2025 20:11:56 +0000 Subject: [PATCH] WIP: Structure Readers --- CMakeLists.txt | 87 +++++- .../mlip/structure_readers/aflow_reader.h | 0 .../structure_readers/castep_cell_reader.h | 74 +++++ .../tadah/mlip/structure_readers/cif_reader.h | 51 +++ .../tadah/mlip/structure_readers/cod_reader.h | 0 .../materials_project_reader.h | 33 ++ .../mlip/structure_readers/nomad_reader.h | 0 .../mlip/structure_readers/structure_reader.h | 33 ++ .../structure_reader_selector.h | 29 ++ .../structure_readers/vasp_poscar_reader.h | 30 ++ src/castep_cell_reader.cpp | 293 ++++++++++++++++++ src/cif_reader.cpp | 59 ++++ src/materials_project_reader.cpp | 122 ++++++++ src/structure_reader_selector.cpp | 114 +++++++ src/vasp_poscar_reader.cpp | 204 ++++++++++++ 15 files changed, 1128 insertions(+), 1 deletion(-) create mode 100644 include/tadah/mlip/structure_readers/aflow_reader.h create mode 100644 include/tadah/mlip/structure_readers/castep_cell_reader.h create mode 100644 include/tadah/mlip/structure_readers/cif_reader.h create mode 100644 include/tadah/mlip/structure_readers/cod_reader.h create mode 100644 include/tadah/mlip/structure_readers/materials_project_reader.h create mode 100644 include/tadah/mlip/structure_readers/nomad_reader.h create mode 100644 include/tadah/mlip/structure_readers/structure_reader.h create mode 100644 include/tadah/mlip/structure_readers/structure_reader_selector.h create mode 100644 include/tadah/mlip/structure_readers/vasp_poscar_reader.h create mode 100644 src/castep_cell_reader.cpp create mode 100644 src/cif_reader.cpp create mode 100644 src/materials_project_reader.cpp create mode 100644 src/structure_reader_selector.cpp create mode 100644 src/vasp_poscar_reader.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 18f0dc6..6c42a3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,13 +44,66 @@ if (BUILD_SHARED_LIBS) endif("${isSystemDir}" STREQUAL "-1") endif() +# Set default build type +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) +if (CMAKE_BUILD_TYPE STREQUAL "DEVELOP") +if(NOT DEFINED ENV{TADAH_PATH}) + message(FATAL_ERROR "TADAH_PATH system variable is undefined for the ${CMAKE_BUILD_TYPE} build") +endif() + function(find_directory_case_insensitive target_name search_dir result_var) + execute_process( + COMMAND find "${search_dir}" -maxdepth 1 -type d -iname "${target_name}" + OUTPUT_VARIABLE found_dir + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(found_dir) + set(${result_var} "${found_dir}" PARENT_SCOPE) + message("${TADAH} Develop build: Found directory: ${found_dir}") + else() + set(${result_var} "" PARENT_SCOPE) + message("${TADAH} Develop build: Directory not found.") + endif() + endfunction() + find_directory_case_insensitive("models" "$ENV{TADAH_PATH}" DEVELOP_MODELS_DIR) + set(FETCHCONTENT_SOURCE_DIR_TADAH.MODELS "${DEVELOP_MODELS_DIR}") +endif() + + +# Check is build supported +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) +if (CMAKE_BUILD_TYPE STREQUAL "DEBUG" + OR + CMAKE_BUILD_TYPE STREQUAL "RELEASE" + OR + CMAKE_BUILD_TYPE STREQUAL "PROFILE" + OR + CMAKE_BUILD_TYPE STREQUAL "DEVELOP" +) + message(STATUS "${TADAH} Build type: ${CMAKE_BUILD_TYPE}.") +else() + message(FATAL_ERROR " + Selected build type is not supported: ${CMAKE_BUILD_TYPE}. + Supported build types: Release, Debug, Profile Develop" + ) +endif() + +# Update the documentation string of CMAKE_BUILD_TYPE for GUIs +set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING + "Choose the type of build, options are: None Debug Release Profile." + FORCE) + + include(FetchContent) if(DEFINED ENV{CI_COMMIT_REF_NAME}) set(GIT_BRANCH "$ENV{CI_COMMIT_REF_NAME}") else() execute_process( - COMMAND bash -c "git status | grep origin | sed -e 's/.*origin\\///' -e 's/[^a-zA-Z].*//'" + COMMAND bash -c "git status | sed -n '1s/^On branch //p'" WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -75,6 +128,38 @@ target_include_directories(tadah.mlip PUBLIC ${Tadah.MODELS_SOURCE_DIR}/include) target_include_directories(tadah.mlip PUBLIC ${Tadah.CORE_SOURCE_DIR}/include) target_include_directories(tadah.mlip PUBLIC ${Tadah.CORE_SOURCE_DIR}/external/toml11) +FetchContent_Declare( + gemmi + GIT_REPOSITORY https://github.com/project-gemmi/gemmi.git + GIT_TAG v0.7.0 +) +FetchContent_MakeAvailable(gemmi) +# target_link_libraries(tadah.mlip PRIVATE gemmi::headers) +target_link_libraries(tadah.mlip PRIVATE gemmi::gemmi_cpp) + +FetchContent_Declare( + json + URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz + DOWNLOAD_EXTRACT_TIMESTAMP true +) +FetchContent_MakeAvailable(json) +target_link_libraries(tadah.mlip PRIVATE nlohmann_json::nlohmann_json) + +find_package(CURL QUIET) # Use QUIET to avoid CMake’s default error + +if(NOT CURL_FOUND) + message(FATAL_ERROR + "\nCould not find libcurl. Please install the development files.\n" + "On Ubuntu/Debian-based systems, try:\n" + " sudo apt-get install libcurl4-openssl-dev\n" + "On Fedora/Red Hat, try:\n" + " sudo dnf install libcurl-devel\n" + "Or specify CURL_INCLUDE_DIR/CURL_LIBRARY manually.\n" + ) +endif() + +target_link_libraries(tadah.mlip PRIVATE CURL::libcurl) + if(TADAH_ENABLE_OPENMP) find_package(OpenMP REQUIRED) target_link_libraries(tadah.mlip PUBLIC OpenMP::OpenMP_CXX) diff --git a/include/tadah/mlip/structure_readers/aflow_reader.h b/include/tadah/mlip/structure_readers/aflow_reader.h new file mode 100644 index 0000000..e69de29 diff --git a/include/tadah/mlip/structure_readers/castep_cell_reader.h b/include/tadah/mlip/structure_readers/castep_cell_reader.h new file mode 100644 index 0000000..d494f30 --- /dev/null +++ b/include/tadah/mlip/structure_readers/castep_cell_reader.h @@ -0,0 +1,74 @@ +#ifndef CASTEP_CELL_READER_H +#define CASTEP_CELL_READER_H + +#include <string> +#include <tadah/mlip/structure.h> +#include <tadah/mlip/structure_readers/structure_reader.h> + +/** + * @brief Reads crystallographic and atomic data from a CASTEP .cell file. + * + * The .cell file can contain: + * - %BLOCK LATTICE_CART or %BLOCK LATTICE_ABC + * - %BLOCK POSITIONS_FRAC or %BLOCK POSITIONS_ABS + * + * If LATTICE_ABC is provided, the code converts (a,b,c,α,β,γ) into + * a 3×3 Cartesian representation with: + * a along x, + * b in xy plane, + * c forming a right-handed set with a and b. + * + * If positions are fractional, we multiply by the 3×3 cell matrix + * to get absolute positions in Angstrom. If positions are absolute, + * we store them directly. + * + * This parser also extracts atomic species from the first token + * in POSITIONS_* blocks. + */ +class CastepCellReader : public StructureReader { +public: + /** + * @brief Default constructor. + */ + CastepCellReader() = default; + + /** + * @brief Reads and parses the specified .cell file. + * @param path Path to the CASTEP cell file. + */ + void read(const std::string &path) override; + + /** + * @brief Returns the parsed Structure with absolute coordinates in Ångström. + */ + Structure getStructure() const override; + +private: + Structure m_structure; + + /** + * @brief Parses the complete contents of a .cell file from a single string. + */ + void parseCellContents(const std::string &contents); + + /** + * @brief Parses LATTICE_ABC lines into the 3×3 matrix in m_structure.cell. + * @param lines a vector of strings containing a b c on one line, alpha beta + * gamma on next line + */ + void parseLatticeABC(const std::vector<std::string> &lines); + + /** + * @brief Converts atomic fractional coordinates (fx, fy, fz) to absolute in Å + * using the current m_structure.cell matrix. + */ + void applyFractional(double fx, double fy, double fz, double &cx, double &cy, + double &cz); + + /** + * @brief Helper for mapping string species (e.g., "Si", "O") to Element enum. + */ + Element parseSpecies(const std::string &sym); +}; + +#endif // CASTEP_CELL_READER_H diff --git a/include/tadah/mlip/structure_readers/cif_reader.h b/include/tadah/mlip/structure_readers/cif_reader.h new file mode 100644 index 0000000..6488cf3 --- /dev/null +++ b/include/tadah/mlip/structure_readers/cif_reader.h @@ -0,0 +1,51 @@ +#ifndef CIF_READER_H +#define CIF_READER_H + +#include <tadah/mlip/structure_readers/structure_reader.h> +#include <tadah/mlip/structure.h> +#include <string> + +/** + * @brief Reads Structure data from a CIF file using older Gemmi APIs. + * + * - Gets cell parameters from block.find_value("_cell_length_a"), etc. + * - Iterates block.items to find loops (where item.kind == gemmi::cif::Item::Loop). + * - Searches loop.tags for _atom_site_fract_x or _atom_site_cartn_x. + * - Uses loop.get_s(row, col) to retrieve the data cells, converting to float/double. + */ +class CifReader : public StructureReader { +public: + CifReader() = default; + + void read(const std::string &path) override; + Structure getStructure() const override; + +private: + Structure m_structure; + + /** + * @brief Parses the entire CIF text, populating m_structure with cell & atoms. + */ + void parseCifContents(const std::string &contents); + + /** + * @brief Creates a 3×3 matrix from (a,b,c, alpha,beta,gamma). + * a along x, b in xy-plane, c forms a right-handed set. + */ + void buildCellFromABC(double aVal, double bVal, double cVal, + double alphaDeg, double betaDeg, double gammaDeg); + + /** + * @brief Convert (fx,fy,fz) fractional → Cartesian coords using m_structure.cell. + */ + void fracToCart(double fx, double fy, double fz, + double &cx, double &cy, double &cz) const; + + /** + * @brief Convert degrees to radians. + */ + static double deg2rad(double deg); +}; + +#endif // CIF_READER_H + diff --git a/include/tadah/mlip/structure_readers/cod_reader.h b/include/tadah/mlip/structure_readers/cod_reader.h new file mode 100644 index 0000000..e69de29 diff --git a/include/tadah/mlip/structure_readers/materials_project_reader.h b/include/tadah/mlip/structure_readers/materials_project_reader.h new file mode 100644 index 0000000..6d08cde --- /dev/null +++ b/include/tadah/mlip/structure_readers/materials_project_reader.h @@ -0,0 +1,33 @@ +#ifndef MATERIALS_PROJECT_READER_H +#define MATERIALS_PROJECT_READER_H + +#include <string> +#include <tadah/mlip/structure_readers/structure_reader.h> + +/** + * @brief Reads a Structure from the Materials Project via REST API (v2). + * + * Usage: + * @code + * MaterialsProjectReader reader("MY_VALID_MP_KEY"); + * reader.read("mp-149"); + * Structure s = reader.getStructure(); + * @endcode + */ +class MaterialsProjectReader : public StructureReader { +public: + explicit MaterialsProjectReader(const std::string &apiKey); + + void read(const std::string &mpID) override; + Structure getStructure() const override; + +private: + std::string m_apiKey; + Structure m_structure; + + void fetchAndParseMP(const std::string &mpID); + std::string httpGet(const std::string &url); + void parseMpJson(const std::string &jsonContent); +}; + +#endif // MATERIALS_PROJECT_READER_H diff --git a/include/tadah/mlip/structure_readers/nomad_reader.h b/include/tadah/mlip/structure_readers/nomad_reader.h new file mode 100644 index 0000000..e69de29 diff --git a/include/tadah/mlip/structure_readers/structure_reader.h b/include/tadah/mlip/structure_readers/structure_reader.h new file mode 100644 index 0000000..e7bf58c --- /dev/null +++ b/include/tadah/mlip/structure_readers/structure_reader.h @@ -0,0 +1,33 @@ +#ifndef STRUCTURE_READER_H +#define STRUCTURE_READER_H + +#include <string> +#include <tadah/mlip/structure.h> + +/** + * @brief Abstract base class for reading a Structure from a file or an online + * source. + * + * Provides a unified interface for various file formats or REST API sources. + */ +class StructureReader { +public: + /// Virtual destructor. + virtual ~StructureReader() = default; + + /** + * @brief Reads structural information from the given path or identifier. + * + * @param path A file path (e.g., "myfile.cif") or an online ID (e.g., + * "mp-149"). + */ + virtual void read(const std::string &path) = 0; + + /** + * @brief Retrieves the last read Structure. + * All coordinates should be in absolute (Cartesian) Angstrom units. + */ + virtual Structure getStructure() const = 0; +}; + +#endif // STRUCTURE_READER_H diff --git a/include/tadah/mlip/structure_readers/structure_reader_selector.h b/include/tadah/mlip/structure_readers/structure_reader_selector.h new file mode 100644 index 0000000..e32b016 --- /dev/null +++ b/include/tadah/mlip/structure_readers/structure_reader_selector.h @@ -0,0 +1,29 @@ +#ifndef STRUCTURE_READER_SELECTOR_H +#define STRUCTURE_READER_SELECTOR_H + +#include <memory> +#include <string> +#include <tadah/mlip/structure_readers/structure_reader.h> + +/** + * @brief A factory that provides an appropriate StructureReader + * based on file extension or recognized online ID prefixes. + */ +class StructureReaderSelector { +public: + /** + * @brief Creates a reader by analyzing the input path or ID. + * + * @param pathOrId File path or an online ID (e.g., "mp-149") + * @return A suitable StructureReader capable of parsing the structure. + * @throws std::runtime_error if no suitable reader is found. + */ + static std::unique_ptr<StructureReader> + getReader(const std::string &pathOrId); + +private: + static std::string guessFormat(const std::string &pathOrId); + static std::string guessFormatFromContent(const std::string &filePath); +}; + +#endif // STRUCTURE_READER_SELECTOR_H diff --git a/include/tadah/mlip/structure_readers/vasp_poscar_reader.h b/include/tadah/mlip/structure_readers/vasp_poscar_reader.h new file mode 100644 index 0000000..2abca5d --- /dev/null +++ b/include/tadah/mlip/structure_readers/vasp_poscar_reader.h @@ -0,0 +1,30 @@ +#ifndef VASP_POSCAR_READER_H +#define VASP_POSCAR_READER_H + +#include <tadah/mlip/structure_readers/structure_reader.h> + +/** + * @brief Reads a VASP POSCAR/CONTCAR file. + * + * A typical format: + * 1) Comment line + * 2) Scaling factor + * 3-5) Lattice vectors + * 6) Element symbols or counts + * 7) Possibly "Selective dynamics" line + * 8) "Direct" or "Cartesian" + * 9+) Atomic coordinates + */ +class VaspPoscarReader : public StructureReader { +public: + VaspPoscarReader() = default; + + void read(const std::string &path) override; + Structure getStructure() const override; + +private: + Structure m_structure; + void parsePoscar(const std::vector<std::string> &lines); +}; + +#endif // VASP_POSCAR_READER_H diff --git a/src/castep_cell_reader.cpp b/src/castep_cell_reader.cpp new file mode 100644 index 0000000..fb09541 --- /dev/null +++ b/src/castep_cell_reader.cpp @@ -0,0 +1,293 @@ +#include <tadah/mlip/structure_readers/castep_cell_reader.h> + +#include <algorithm> +#include <cctype> +#include <cmath> +#include <fstream> +#include <sstream> +#include <stdexcept> +#include <vector> + +//-------------------------------------------------------------------- +// Helper to split a line by whitespace +//-------------------------------------------------------------------- +static std::vector<std::string> split_line(const std::string &line) { + std::istringstream iss(line); + std::vector<std::string> tokens; + std::string s; + while (iss >> s) { + tokens.push_back(s); + } + return tokens; +} + +// Convert degrees to radians +static double deg2rad(double deg) { + static const double PI = 3.141592653589793; + return deg * (PI / 180.0); +} + +//-------------------------------------------------------------------- +// Implementation of public methods +//-------------------------------------------------------------------- +void CastepCellReader::read(const std::string &path) { + std::ifstream ifs(path); + if (!ifs.is_open()) { + throw std::runtime_error("CastepCellReader: Cannot open file " + path); + } + + std::ostringstream buffer; + buffer << ifs.rdbuf(); + parseCellContents(buffer.str()); + + m_structure.label = "CASTEP cell file: " + path; +} + +Structure CastepCellReader::getStructure() const { + return m_structure; +} + +//-------------------------------------------------------------------- +// parseCellContents +//-------------------------------------------------------------------- +void CastepCellReader::parseCellContents(const std::string &contents) { + std::istringstream iss(contents); + std::string line; + + // State flags for each block + bool inLatticeCart = false; + bool inLatticeABC = false; + bool inPosFracBlock = false; + bool inPosAbsBlock = false; + + int lcCount = 0; // track lines read for LATTICE_CART + std::vector<std::string> abcLines; // store lines for LATTICE_ABC + + // Initialize cell to zero (in case we never find any block) + for (int i=0; i<3; ++i) + for (int j=0; j<3; ++j) + m_structure.cell(i,j) = 0.0; + + // Read file line by line + while (std::getline(iss, line)) { + // Make a lowercase copy for easier detection + std::string lcase = line; + std::transform(lcase.begin(), lcase.end(), lcase.begin(), ::tolower); + + // Detect block starts/ends + if (lcase.find("%block lattice_cart") != std::string::npos) { + inLatticeCart = true; + inLatticeABC = false; + lcCount = 0; + continue; + } + if (lcase.find("%endblock lattice_cart") != std::string::npos) { + inLatticeCart = false; + continue; + } + if (lcase.find("%block lattice_abc") != std::string::npos) { + inLatticeABC = true; + inLatticeCart = false; + lcCount = 0; + abcLines.clear(); + continue; + } + if (lcase.find("%endblock lattice_abc") != std::string::npos) { + inLatticeABC = false; + // parse lines from abcLines (if present) + if (abcLines.size() >= 2) { + parseLatticeABC(abcLines); + } + continue; + } + if (lcase.find("%block positions_frac") != std::string::npos) { + inPosFracBlock = true; + inPosAbsBlock = false; + continue; + } + if (lcase.find("%endblock positions_frac") != std::string::npos) { + inPosFracBlock = false; + continue; + } + if (lcase.find("%block positions_abs") != std::string::npos) { + inPosAbsBlock = true; + inPosFracBlock = false; + continue; + } + if (lcase.find("%endblock positions_abs") != std::string::npos) { + inPosAbsBlock = false; + continue; + } + + // Now parse lines if in any recognized block + if (inLatticeCart) { + // read up to 3 lines for a,b,c + if (lcCount < 3) { + auto toks = split_line(line); + if (toks.size() < 3) { + throw std::runtime_error("CastepCellReader: LATTICE_CART line missing coords."); + } + double x = std::stod(toks[0]); + double y = std::stod(toks[1]); + double z = std::stod(toks[2]); + + m_structure.cell(lcCount,0) = x; + m_structure.cell(lcCount,1) = y; + m_structure.cell(lcCount,2) = z; + lcCount++; + } + } + else if (inLatticeABC) { + // store lines for later parsing + if (!line.empty()) { + abcLines.push_back(line); + } + } + else if (inPosFracBlock) { + // e.g. "Si 0.0 0.0 0.0" + auto toks = split_line(line); + if (toks.size() < 4) { + // possibly a comment or empty line, skip + continue; + } + + // parse species + Element e = parseSpecies(toks[0]); + + // fractional coords + double fx = std::stod(toks[1]); + double fy = std::stod(toks[2]); + double fz = std::stod(toks[3]); + + double cx, cy, cz; + applyFractional(fx, fy, fz, cx, cy, cz); + + Atom a; + // Copy base element data into 'a' + static_cast<Element&>(a) = e; + // Now set the position + a.position = {cx, cy, cz}; + + // Force is left default (0,0,0) + m_structure.atoms.push_back(a); + } + else if (inPosAbsBlock) { + // e.g. "O 1.2 3.4 5.6" + auto toks = split_line(line); + if (toks.size() < 4) { + continue; + } + + Element e = parseSpecies(toks[0]); + + double x = std::stod(toks[1]); + double y = std::stod(toks[2]); + double z = std::stod(toks[3]); + + Atom a; + static_cast<Element&>(a) = e; + a.position = {x, y, z}; + + m_structure.atoms.push_back(a); + } + } +} + +//-------------------------------------------------------------------- +// parseLatticeABC +//-------------------------------------------------------------------- +void CastepCellReader::parseLatticeABC(const std::vector<std::string> &lines) { + // We expect at least two lines: + // line 1: "a b c" + // line 2: "alpha beta gamma" + // Additional lines might exist (units, comments). + // We'll search for the first line with >=3 tokens for (a,b,c), + // and the next with >=3 tokens for (alpha,beta,gamma). + + double aVal=0.0, bVal=0.0, cVal=0.0; + double alphaDeg=90.0, betaDeg=90.0, gammaDeg=90.0; + bool haveABC = false; + bool haveAngles = false; + + for (auto &ln : lines) { + auto toks = split_line(ln); + if (toks.size() >= 3 && !haveABC) { + aVal = std::stod(toks[0]); + bVal = std::stod(toks[1]); + cVal = std::stod(toks[2]); + haveABC = true; + continue; + } + else if (toks.size() >= 3 && haveABC && !haveAngles) { + alphaDeg = std::stod(toks[0]); + betaDeg = std::stod(toks[1]); + gammaDeg = std::stod(toks[2]); + haveAngles = true; + break; + } + } + if (!haveABC || !haveAngles) { + throw std::runtime_error("CastepCellReader: LATTICE_ABC block incomplete (need a,b,c and alpha,beta,gamma)."); + } + + double alpha = deg2rad(alphaDeg); + double beta = deg2rad(betaDeg); + double gamma = deg2rad(gammaDeg); + + // Standard orientation: + // a = (aVal, 0, 0) + // b.x = bVal * cos(gamma) + // b.y = bVal * sin(gamma) + // b.z = 0 + // c.x = cVal * cos(beta) + // c.y = cVal * [cos(alpha) - cos(beta)*cos(gamma)] / sin(gamma) + // c.z = sqrt(cVal^2 - cx^2 - cy^2) + + // a + m_structure.cell(0,0) = aVal; + m_structure.cell(0,1) = 0.0; + m_structure.cell(0,2) = 0.0; + + // b + double bx = bVal * std::cos(gamma); + double by = bVal * std::sin(gamma); + m_structure.cell(1,0) = bx; + m_structure.cell(1,1) = by; + m_structure.cell(1,2) = 0.0; + + // c + double cx = cVal * std::cos(beta); + double cy = cVal * (std::cos(alpha) - std::cos(beta)*std::cos(gamma)) / std::sin(gamma); + double czsq = cVal*cVal - cx*cx - cy*cy; + if (czsq < 0.0) { + // could be numerical rounding or truly invalid + if (czsq > -1e-10) czsq = 0.0; // clamp small negative to zero + else { + throw std::runtime_error("CastepCellReader: invalid angles produce negative c.z^2."); + } + } + double cz = std::sqrt(czsq); + + m_structure.cell(2,0) = cx; + m_structure.cell(2,1) = cy; + m_structure.cell(2,2) = cz; +} + +//-------------------------------------------------------------------- +// applyFractional +//-------------------------------------------------------------------- +void CastepCellReader::applyFractional(double fx, double fy, double fz, + double &cx, double &cy, double &cz) { + // r_cart = fx*a + fy*b + fz*c + // a, b, c are rows of the 3×3 matrix in m_structure.cell + cx = fx*m_structure.cell(0,0) + fy*m_structure.cell(1,0) + fz*m_structure.cell(2,0); + cy = fx*m_structure.cell(0,1) + fy*m_structure.cell(1,1) + fz*m_structure.cell(2,1); + cz = fx*m_structure.cell(0,2) + fy*m_structure.cell(1,2) + fz*m_structure.cell(2,2); +} + +//-------------------------------------------------------------------- +// parseSpecies +//-------------------------------------------------------------------- +Element CastepCellReader::parseSpecies(const std::string &sym) { + return PeriodicTable().find_by_symbol(sym); +} diff --git a/src/cif_reader.cpp b/src/cif_reader.cpp new file mode 100644 index 0000000..1cba6f2 --- /dev/null +++ b/src/cif_reader.cpp @@ -0,0 +1,59 @@ +#include <tadah/core/periodic_table.h> +#include <tadah/mlip/structure_readers/cif_reader.h> + +#include <gemmi/cif.hpp> +#include <gemmi/smcif.hpp> + +#include <cmath> +#include <fstream> +#include <sstream> +#include <stdexcept> + +void CifReader::read(const std::string &path) { + std::ifstream ifs(path); + if (!ifs.is_open()) { + throw std::runtime_error("CifReader::read: Cannot open " + path); + } + + std::ostringstream buffer; + buffer << ifs.rdbuf(); + parseCifContents(buffer.str()); + + // Label the source + m_structure.label = "CIF file: " + path; +} + +Structure CifReader::getStructure() const { return m_structure; } + +void CifReader::parseCifContents(const std::string &fileContents) { + using namespace gemmi::cif; + + // Parses the raw CIF content into a gemmi Document + Document doc = read_string(fileContents); + if (doc.blocks.empty()) { + throw std::runtime_error( + "CifReader::parseCifContents: No data blocks found."); + } + + // Assumes there is only one data block in the CIF + Block &block = doc.sole_block(); + gemmi::SmallStructure struc = gemmi::make_small_structure_from_block(block); + struc.check_spacegroup(); + + auto sites = struc.get_all_unit_cell_sites(); + + m_structure.atoms.reserve(sites.size()); + for (const auto &site : sites) { + auto p = struc.cell.orthogonalize(site.fract); + auto label = site.element.name(); + Element e = PeriodicTable().find_by_symbol(label); + Atom a(e, p.x, p.y, p.z, 0, 0, 0); + m_structure.atoms.push_back(a); + } + auto mat = struc.cell.orth.mat; + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 3; ++j) { + m_structure.cell(i, j) = mat.a[i][j]; + } + } +} diff --git a/src/materials_project_reader.cpp b/src/materials_project_reader.cpp new file mode 100644 index 0000000..dc688bd --- /dev/null +++ b/src/materials_project_reader.cpp @@ -0,0 +1,122 @@ +#include <sstream> +#include <stdexcept> +#include <tadah/mlip/structure_readers/materials_project_reader.h> + +#include <curl/curl.h> // libcurl +#include <nlohmann/json.hpp> // nlohmann JSON + +static Element parse_element_mp(const std::string &elemName); + +MaterialsProjectReader::MaterialsProjectReader(const std::string &apiKey) + : m_apiKey(apiKey) {} + +void MaterialsProjectReader::read(const std::string &mpID) { + fetchAndParseMP(mpID); + m_structure.label = "MaterialsProject ID: " + mpID; +} + +Structure MaterialsProjectReader::getStructure() const { return m_structure; } + +void MaterialsProjectReader::fetchAndParseMP(const std::string &mpID) { + std::string url = "https://materialsproject.org/rest/v2/materials/"; + url += mpID + "/structure?API_KEY=" + m_apiKey; + + std::string response = httpGet(url); + parseMpJson(response); +} + +std::string MaterialsProjectReader::httpGet(const std::string &url) { + CURL *curl = curl_easy_init(); + if (!curl) { + throw std::runtime_error( + "MaterialsProjectReader::httpGet: Failed to init libcurl"); + } + + auto writeCallback = [](char *ptr, size_t size, size_t nmemb, + void *userdata) -> size_t { + std::string *resp = static_cast<std::string *>(userdata); + size_t totalSize = size * nmemb; + resp->append(ptr, totalSize); + return totalSize; + }; + + std::string response; + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + curl_easy_cleanup(curl); + throw std::runtime_error( + std::string("httpGet: curl_easy_perform() failed: ") + + curl_easy_strerror(res)); + } + curl_easy_cleanup(curl); + return response; +} + +void MaterialsProjectReader::parseMpJson(const std::string &jsonContent) { + using json = nlohmann::json; + json root = json::parse(jsonContent); + + if (!root.contains("response") || !root["response"].is_array() || + root["response"].empty()) { + throw std::runtime_error("parseMpJson: No response array in JSON."); + } + + auto data = root["response"][0]; + if (!data.contains("lattice") || !data["lattice"].contains("matrix")) { + throw std::runtime_error("parseMpJson: Missing lattice.matrix"); + } + + auto mat = data["lattice"]["matrix"]; + if (!mat.is_array() || mat.size() != 3) { + throw std::runtime_error("parseMpJson: matrix is not 3x3"); + } + + for (int i = 0; i < 3; i++) { + if (!mat[i].is_array() || mat[i].size() != 3) { + throw std::runtime_error("parseMpJson: matrix row is not size 3"); + } + m_structure.cell(i, 0) = mat[i][0].get<double>(); + m_structure.cell(i, 1) = mat[i][1].get<double>(); + m_structure.cell(i, 2) = mat[i][2].get<double>(); + } + + if (!data.contains("sites") || !data["sites"].is_array()) { + throw std::runtime_error("parseMpJson: No sites array"); + } + auto sites = data["sites"]; + for (auto &site : sites) { + if (!site.contains("abc") || !site["abc"].is_array() || + site["abc"].size() != 3) { + throw std::runtime_error("parseMpJson: site missing abc array"); + } + double fx = site["abc"][0].get<double>(); + double fy = site["abc"][1].get<double>(); + double fz = site["abc"][2].get<double>(); + + // fractional -> cart + double x = fx * m_structure.cell(0, 0) + fy * m_structure.cell(1, 0) + + fz * m_structure.cell(2, 0); + double y = fx * m_structure.cell(0, 1) + fy * m_structure.cell(1, 1) + + fz * m_structure.cell(2, 1); + double z = fx * m_structure.cell(0, 2) + fy * m_structure.cell(1, 2) + + fz * m_structure.cell(2, 2); + + Atom a; + std::string lbl = site["label"].get<std::string>(); + Element e = parse_element_mp(lbl); + static_cast<Element &>(a) = e; + a.position[0] = x; + a.position[1] = y; + a.position[2] = z; + m_structure.atoms.push_back(a); + } +} + +static Element parse_element_mp(const std::string &elemName) { + return PeriodicTable().find_by_symbol(elemName); +} diff --git a/src/structure_reader_selector.cpp b/src/structure_reader_selector.cpp new file mode 100644 index 0000000..684d2ce --- /dev/null +++ b/src/structure_reader_selector.cpp @@ -0,0 +1,114 @@ +#include <tadah/mlip/structure_readers/structure_reader_selector.h> +#include <tadah/mlip/structure_readers/cif_reader.h> +#include <tadah/mlip/structure_readers/vasp_poscar_reader.h> +#include <tadah/mlip/structure_readers/castep_cell_reader.h> +/*#include <tadah/mlip/structure_readers/extended_xyz_reader.h>*/ +/*#include <tadah/mlip/structure_readers/xsf_reader.h>*/ +#include <tadah/mlip/structure_readers/materials_project_reader.h> +/*#include <tadah/mlip/structure_readers/cod_reader.h>*/ +/*#include <tadah/mlip/structure_readers/aflow_reader.h>*/ +/*#include <tadah/mlip/structure_readers/nomad_reader.h>*/ + +#include <fstream> +#include <algorithm> +#include <cctype> +#include <stdexcept> + +std::unique_ptr<StructureReader> StructureReaderSelector::getReader(const std::string& pathOrId) +{ + std::cout << "Reading structure from: " << pathOrId << std::endl; + std::string format = guessFormat(pathOrId); + std::cout << "Format: " << format << std::endl; + + if (format == "MP") { + return std::make_unique<MaterialsProjectReader>("MY_MP_API_KEY"); // Provide your real key + } + else if (format == "CIF") { + std::cout << "CIF Reader Selected" << std::endl; + return std::make_unique<CifReader>(); + } + else if (format == "POSCAR") { + return std::make_unique<VaspPoscarReader>(); + } + else if (format == "CELL") { + return std::make_unique<CastepCellReader>(); + } + /*else if (format == "XYZ") {*/ + /* return std::make_unique<ExtendedXyzReader>();*/ + /*}*/ + /*else if (format == "XSF") {*/ + /* return std::make_unique<XsfReader>();*/ + /*}*/ + /*else if (format == "COD") {*/ + /* return std::make_unique<CODReader>();*/ + /*}*/ + /*else if (format == "AFLOW") {*/ + /* return std::make_unique<AflowReader>();*/ + /*}*/ + /*else if (format == "NOMAD") {*/ + /* return std::make_unique<NomadReader>();*/ + /*}*/ + else { + throw std::runtime_error("StructureReaderSelector::getReader - Unknown format for " + pathOrId); + } +} + +std::string StructureReaderSelector::guessFormat(const std::string& pathOrId) +{ + // Check known online ID patterns: + if (pathOrId.rfind("mp-", 0) == 0) { + return "MP"; + } + if (pathOrId.rfind("cod-", 0) == 0) { + return "COD"; + } + if (pathOrId.rfind("aflow-", 0) == 0) { + return "AFLOW"; + } + if (pathOrId.rfind("nomad-", 0) == 0) { + return "NOMAD"; + } + + // Check file extension: + auto dotPos = pathOrId.find_last_of('.'); + if (dotPos != std::string::npos) { + std::string ext = pathOrId.substr(dotPos + 1); + std::transform(ext.begin(), ext.end(), ext.begin(), + [](unsigned char c){ return std::tolower(c); }); + + if (ext == "cif") return "CIF"; + if (ext == "cell") return "CELL"; + if (ext == "xsf") return "XSF"; + if (ext == "xyz") return "XYZ"; + // Some VASP files may not have an extension + } + + return guessFormatFromContent(pathOrId); +} + +std::string StructureReaderSelector::guessFormatFromContent(const std::string& filePath) +{ + std::ifstream ifs(filePath); + if (!ifs.is_open()) { + // If can't open, guess POSCAR (commonly no extension). + return "POSCAR"; + } + + std::string line; + if (std::getline(ifs, line)) { + // Some naive checks + if (line.find("lattice_vector") != std::string::npos) { + return "XSF"; + } + if (line.find("CELL_PARAMETERS") != std::string::npos) { + return "CIF"; + } + if (line.find("%BLOCK LATTICE_") != std::string::npos) { + return "CELL"; + } + } + + // fallback + return "POSCAR"; +} + diff --git a/src/vasp_poscar_reader.cpp b/src/vasp_poscar_reader.cpp new file mode 100644 index 0000000..086cc51 --- /dev/null +++ b/src/vasp_poscar_reader.cpp @@ -0,0 +1,204 @@ +#include <algorithm> +#include <cctype> +#include <fstream> +#include <sstream> +#include <stdexcept> +#include <tadah/mlip/structure_readers/vasp_poscar_reader.h> +#include <vector> + +// A small helper to parse each line into tokens +static std::vector<std::string> split(const std::string &str); + +void VaspPoscarReader::read(const std::string &path) { + std::ifstream ifs(path); + if (!ifs.is_open()) { + throw std::runtime_error("VaspPoscarReader::read: Cannot open " + path); + } + + std::vector<std::string> lines; + { + std::string line; + while (std::getline(ifs, line)) { + lines.push_back(line); + } + } + if (lines.size() < 8) { + throw std::runtime_error( + "VaspPoscarReader::read: Not enough lines for a POSCAR file."); + } + + parsePoscar(lines); + m_structure.label = "POSCAR file: " + path; +} + +Structure VaspPoscarReader::getStructure() const { return m_structure; } + +void VaspPoscarReader::parsePoscar(const std::vector<std::string> &lines) { + // 1) Ignore comment line (lines[0]) + // 2) Scaling factor + double scaleFactor = 1.0; + { + std::stringstream ss(lines[1]); + ss >> scaleFactor; + if (!ss.good() && !ss.eof()) { + // if parse fails, use default = 1.0 + scaleFactor = 1.0; + } + } + + // 3-5) Lattice vectors + for (int i = 0; i < 3; ++i) { + std::stringstream ss(lines[2 + i]); + double x, y, z; + ss >> x >> y >> z; + m_structure.cell(i, 0) = x * scaleFactor; + m_structure.cell(i, 1) = y * scaleFactor; + m_structure.cell(i, 2) = z * scaleFactor; + } + + // 6) Next line could be element symbols or counts + // For simplicity, assume it's element symbols + bool hasElementSymbols = true; + std::vector<std::string> fields = split(lines[5]); + // If they're all digits -> that's the counts line, no symbols + { + bool allDigits = true; + for (auto &f : fields) { + if (f.empty() || !std::all_of(f.begin(), f.end(), ::isdigit)) { + allDigits = false; + break; + } + } + if (allDigits) + hasElementSymbols = false; + } + + // If hasElementSymbols is true, the line after it is counts. + // If not, line[5] is counts + size_t countsLineIndex = hasElementSymbols ? 6 : 5; + if (countsLineIndex >= lines.size()) { + throw std::runtime_error( + "VaspPoscarReader: incomplete POSCAR (missing counts line)."); + } + + std::vector<int> speciesCounts; + { + std::vector<std::string> cfields = split(lines[countsLineIndex]); + for (auto &cf : cfields) { + speciesCounts.push_back(std::stoi(cf)); + } + } + size_t nextLineIndex = countsLineIndex + 1; + if (nextLineIndex >= lines.size()) { + throw std::runtime_error( + "VaspPoscarReader: incomplete POSCAR (no coords section)."); + } + + // Check if there's a "Selective Dynamics" line + bool selectiveDynamics = false; + bool directCoords = true; // or cart + { + std::string s = lines[nextLineIndex]; + std::transform(s.begin(), s.end(), s.begin(), ::tolower); + if (s.find("selective") != std::string::npos) { + selectiveDynamics = true; + nextLineIndex++; + } + } + // Now check direct or cart + { + if (nextLineIndex >= lines.size()) { + throw std::runtime_error("VaspPoscarReader: no coordinate type line."); + } + std::string s = lines[nextLineIndex]; + std::transform(s.begin(), s.end(), s.begin(), ::tolower); + if (s.find("cart") != std::string::npos) { + directCoords = false; + } + // else direct if "direct" or "frac" + nextLineIndex++; + } + + // Now we read atomic coordinates + // sum(speciesCounts) is total # of atoms + int totalAtoms = 0; + for (auto c : speciesCounts) + totalAtoms += c; + + if (nextLineIndex + totalAtoms > lines.size()) { + throw std::runtime_error( + "VaspPoscarReader: Not enough lines for all atoms."); + } + + // If hasElementSymbols, we have e.g. line[5] = "Si O", line[6] = "2 4" + // We might store them in an array to track species assignment + std::vector<std::string> elemSymbols; + if (hasElementSymbols) { + elemSymbols = split(lines[5]); + } else { + // fallback: unknown symbol, use X + int nEl = (int)speciesCounts.size(); + elemSymbols.resize(nEl, "X"); + } + + // Expand element symbols for each atomic site + std::vector<std::string> atomSymbols; + { + size_t idx = 0; + for (size_t iEl = 0; iEl < speciesCounts.size(); iEl++) { + for (int k = 0; k < speciesCounts[iEl]; k++) { + atomSymbols.push_back(elemSymbols[iEl]); + idx++; + } + } + } + + // parse coords + for (int i = 0; i < totalAtoms; i++) { + std::vector<std::string> cfields = split(lines[nextLineIndex + i]); + if (cfields.size() < 3) { + throw std::runtime_error( + "VaspPoscarReader: not enough coords for atom line " + + std::to_string(nextLineIndex + i)); + } + double x = std::stod(cfields[0]); + double y = std::stod(cfields[1]); + double z = std::stod(cfields[2]); + + Atom a; + // parse element from atomSymbols[i] if you maintain a map + // For brevity, store minimal approach: + Element e = PeriodicTable().find_by_symbol(atomSymbols[i]); + static_cast<Element &>(a) = e; + + if (directCoords) { + // Convert fractional -> cart + double cx = x * m_structure.cell(0, 0) + y * m_structure.cell(1, 0) + + z * m_structure.cell(2, 0); + double cy = x * m_structure.cell(0, 1) + y * m_structure.cell(1, 1) + + z * m_structure.cell(2, 1); + double cz = x * m_structure.cell(0, 2) + y * m_structure.cell(1, 2) + + z * m_structure.cell(2, 2); + a.position[0] = cx; + a.position[1] = cy; + a.position[2] = cz; + } else { + // Already cart + a.position[0] = x; + a.position[1] = y; + a.position[2] = z; + } + + m_structure.atoms.push_back(a); + } +} + +static std::vector<std::string> split(const std::string &str) { + std::vector<std::string> tokens; + std::istringstream iss(str); + std::string s; + while (iss >> s) { + tokens.push_back(s); + } + return tokens; +} -- GitLab