diff --git a/include/tadah/mlip/structure_readers/aflow_reader.h b/include/tadah/mlip/structure_readers/aflow_reader.h index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f1e68d29382e07fba7c9873e9cd7554dabc11a63 100644 --- a/include/tadah/mlip/structure_readers/aflow_reader.h +++ b/include/tadah/mlip/structure_readers/aflow_reader.h @@ -0,0 +1,98 @@ +#ifndef AFLOW_READER_H +#define AFLOW_READER_H + +#include <string> +#include <tadah/mlip/structure_readers/structure_reader.h> +#include <tadah/mlip/structure.h> + +/** + * @class AflowReader + * @brief Implements the StructureReader interface to fetch structural data from the AFLOW REST API + * using only an AUID (AFLOW Unique Identifier). + * + * The AUID is a string like: "aflow:e9c6d914c4b8d9ca". + * + * Example usage: + * AflowReader reader; + * reader.read("aflow:e9c6d914c4b8d9ca"); + * Structure st = reader.getStructure(); + * // Now st.cell and st.atoms are populated if geometry info was found. + * + * The AFLOW search endpoint is constructed as: + * "https://aflowlib.duke.edu/search/?auid=<AUID>&format=json" + */ +class AflowReader : public StructureReader { +public: + /** + * @brief Default constructor. No special authentication is required. + */ + AflowReader(); + + /** + * @brief Reads structural data by AUID. + * + * @param auid Should be of the form "aflow:XXXXXXXXXXXXXXXX". + */ + void read(const std::string &auid) override; + + /** + * @brief Returns the last retrieved structure from AFLOW. + * + * @return The structure, including cell and atoms. + */ + Structure getStructure() const override; + +private: + /// Internal storage of the retrieved structure. + Structure m_structure; + + /** + * @brief fetchAndParseAflowByAuid() builds the AFLOW search URL using the given AUID + * and calls parseAflowJson(). + * + * @param auid The AUID string: "aflow:XXXXXXXXXXXX...". + */ + void fetchAndParseAflowByAuid(const std::string &auid); + + /** + * @brief httpGet() sends a GET request to the provided URL + * and collects the response body into a std::string. + * + * @param url The target endpoint. + * @return The full response body as a std::string. + */ + std::string httpGet(const std::string &url); + + /** + * @brief parseAflowJson() interprets JSON from AFLOW, looking for + * geometry, positions_cartesian, species, stoichiometry, etc. + * + * @param jsonContent Raw JSON response from AFLOW. + */ + void parseAflowJson(const std::string &jsonContent); + + /** + * @brief makeCellMatrix() constructs a 3×3 lattice from the provided + * cell parameters (a,b,c,α,β,γ in degrees). + * + * The standard crystallographic formula is used: + * a1 = ( a, 0, 0 ) + * a2 = ( b cos γ, b sin γ, 0 ) + * a3 = ( c cos β, c( cos α - cos β cos γ ) / sin γ, + * c sqrt(1 - cos^2 β - [ (cos α - cos β cos γ)/ sin γ]^2 ) ) + * + * @param a Length a in Å. + * @param b Length b in Å. + * @param c Length c in Å. + * @param alpha Angle α in degrees. + * @param beta Angle β in degrees. + * @param gamma Angle γ in degrees. + * @param cell Output 3×3 array for storing the resulting lattice vectors. + */ + void makeCellMatrix(double a, double b, double c, + double alpha, double beta, double gamma, + double (&cell)[3][3]); +}; + +#endif // AFLOW_READER_H + diff --git a/include/tadah/mlip/structure_readers/cif_reader.h b/include/tadah/mlip/structure_readers/cif_reader.h index 6488cf3eb2791e4ea2e3520e4984f622eed36483..691867c3e7582f8cc091429db331a5f6c157bbc3 100644 --- a/include/tadah/mlip/structure_readers/cif_reader.h +++ b/include/tadah/mlip/structure_readers/cif_reader.h @@ -20,14 +20,14 @@ public: void read(const std::string &path) override; Structure getStructure() const override; -private: - Structure m_structure; - /** * @brief Parses the entire CIF text, populating m_structure with cell & atoms. */ void parseCifContents(const std::string &contents); +private: + Structure m_structure; + /** * @brief Creates a 3×3 matrix from (a,b,c, alpha,beta,gamma). * a along x, b in xy-plane, c forms a right-handed set. diff --git a/include/tadah/mlip/structure_readers/cod_reader.h b/include/tadah/mlip/structure_readers/cod_reader.h index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..92b679db3fd5785f45b9c70b9885f37a3324df50 100644 --- a/include/tadah/mlip/structure_readers/cod_reader.h +++ b/include/tadah/mlip/structure_readers/cod_reader.h @@ -0,0 +1,62 @@ +#ifndef COD_READER_H +#define COD_READER_H + +#include <tadah/mlip/structure_readers/structure_reader.h> +#include <tadah/mlip/structure_readers/cif_reader.h> +#include <tadah/mlip/structure.h> + +#include <string> + +/** + * @brief Reads a CIF file from the Crystallography Open Database (COD) + * in memory and parses its content using an existing CifReader. + * + * Inherits from StructureReader to provide a common interface + * for reading structures from online sources. + */ +class codReader : public StructureReader { +public: + /// Default constructor + codReader() = default; + + /** + * @brief Retrieves a CIF file from the COD database by constructing + * a URL or using a direct URL and loads it into a Structure object. + * + * If @p path does not start with "http://" or "https://", the URL is built + * from a COD ID (e.g., "1534932") by appending ".cif". If ".cif" is already + * in @p path, it is appended as is. The downloaded content is parsed with CifReader. + * + * @param path A COD entry ID or a fully qualified URL. + */ + void read(const std::string &path) override; + + /** + * @brief Accesses the last successfully read Structure. + */ + Structure getStructure() const override; + +private: + /** + * @brief A helper callback function that libcurl uses to write received bytes + * into a std::string in memory. + */ + static size_t writeDataCallback(void* ptr, size_t size, size_t nmemb, void* userData); + + /** + * @brief Downloads data from the given URL and returns it as a single string. + * + * Uses libcurl's easy interface for making a GET request. + * + * @param url A valid URL that points to a CIF resource on COD. + * @return The complete content fetched from @p url. + */ + std::string fetchDataFromURL(const std::string &url) const; + + /** + * @brief Internal buffer for storing the last read structure. + */ + Structure m_structure; +}; + +#endif // COD_READER_H diff --git a/src/aflow_reader.cpp b/src/aflow_reader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..756a71f08c9c45bba5ff10aa4cfba7895672a83e --- /dev/null +++ b/src/aflow_reader.cpp @@ -0,0 +1,363 @@ +#include <tadah/mlip/structure_readers/aflow_reader.h> + +#include <curl/curl.h> +#include <nlohmann/json.hpp> +#include <stdexcept> +#include <sstream> +#include <iostream> +#include <cmath> +#include <vector> + +/* + writeCallback(ptr, size, nmemb, userdata) gathers the data returned + via cURL into an std::string. The cURL library calls this function + repeatedly to deliver chunks of the HTTP response body. +*/ +static size_t writeCallback(char* ptr, size_t size, size_t nmemb, void* userdata) { + auto* response = static_cast<std::string*>(userdata); + size_t totalBytes = size * nmemb; + response->append(ptr, totalBytes); + return totalBytes; +} + +// No special authentication or configuration is required in AflowReader. +AflowReader::AflowReader() { +} + +/* + read(auid): + Reads AFLOW data corresponding to the input AUID (e.g. "aflow:e9c6d914c4b8d9ca"), + then parses it into an internal Structure object for later retrieval. +*/ +void AflowReader::read(const std::string &auid) { + fetchAndParseAflowByAuid(auid); +} + +// getStructure(): +// Returns the most recently fetched AFLOW structure. +Structure AflowReader::getStructure() const { + return m_structure; +} + +/* + convertAURLToURL(aurl): + Converts an AFLOW-style aurl, usually "server:PATH", + into "server/PATH". This helps build a domain/path + format recognized by HTTP requests. For example: + + "aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/BCC/Ta1_ICSD_76152" + + becomes: + + "aflowlib.duke.edu/AFLOWDATA/ICSD_WEB/BCC/Ta1_ICSD_76152". +*/ +std::string convertAURLToURL(const std::string &aurl) { + const std::size_t pos = aurl.find(':'); + if (pos != std::string::npos) { + std::string server = aurl.substr(0, pos); + std::string path = aurl.substr(pos + 1); + return server + "/" + path; + } + return aurl; +} + +/* + fetchAndParseAflowByAuid(auid): + Issues a two-step request: + 1) Queries https://aflow.org/API/aflux/?auid('<AUID>') + -> Returns a JSON array which includes "aurl". + 2) Converts aflow "aurl" to a standard URL adding "?format=json" + -> Retrieves the final JSON describing the structure. + + Afterwards, parseAflowJson(response) + populates the internal Structure object. +*/ +void AflowReader::fetchAndParseAflowByAuid(const std::string &auid) { + const std::string baseUrl = "https://aflow.org/API/aflux/?auid('"; + std::string url = baseUrl + auid + "')"; + + std::string response0 = httpGet(url); + + using json = nlohmann::json; + json root = json::parse(response0); + + // The top-level JSON is an array. We only handle the first element. + if (!root.is_array() || root.empty()) { + std::cerr << "Error: Response is empty or not an array.\n"; + return; + } + + json firstObject = root[0]; + std::string aurl = firstObject["aurl"].get<std::string>(); + aurl = convertAURLToURL(aurl) + "?format=json"; + + std::string response = httpGet(aurl); + parseAflowJson(response); +} + +/* + httpGet(url): + Performs a simple GET request using cURL, returning the + response body as a std::string. Follows redirects automatically. + Throws std::runtime_error if curl fails. +*/ +std::string AflowReader::httpGet(const std::string &url) { + CURL* curl = curl_easy_init(); + if (!curl) { + throw std::runtime_error("AflowReader::httpGet: Failed to init cURL."); + } + + std::string response; + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "AflowReader/1.0"); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + curl_easy_cleanup(curl); + throw std::runtime_error( + "AflowReader::httpGet: " + std::string(curl_easy_strerror(res)) + ); + } + + curl_easy_cleanup(curl); + return response; +} + +/* + parseAflowJson(jsonContent): + Interprets the final AFLOW JSON, which includes + lattice parameters ("geometry"), composition, species list, + and cartesian positions. + + - The "compound" is appended to the label. + - "volume_cell", "density", and "auid" + are placed in the label for clarity. + - "geometry" is used to build a 3×3 cell matrix via makeCellMatrix(). + - "positions_cartesian" plus stoichiometry & species become the final atoms. + + If the species or positions do not match in count, + the atoms are labeled "X" as a fallback. +*/ +void AflowReader::parseAflowJson(const std::string &jsonContent) { + using json = nlohmann::json; + + json root = json::parse(jsonContent); + + //------------------------------------------------------------------ + // 1) Build a label with: compound, volume, + // density, AUID. + //------------------------------------------------------------------ + std::string compound = root.value("compound", "UnknownCompound"); + std::string composition = root.value("composition", "N/A"); + std::string species = root.value("species", "N/A"); + std::string volumeCell = root.value("volume_cell", "N/A"); + std::string density = root.value("density", "N/A"); + std::string auid = root.value("auid", "N/A"); + + std::ostringstream lbl; + lbl << auid << " | " << compound + << " | Volume: " << volumeCell + << " | Density: " << density; + m_structure.label = lbl.str(); + + //------------------------------------------------------------------ + // 2) Extract geometry: "a,b,c,alpha,beta,gamma". + //------------------------------------------------------------------ + double a = 1.0; + double b = 1.0; + double c = 1.0; + double alpha = 90.0; + double beta = 90.0; + double gamma = 90.0; + + if (root.contains("geometry")) { + std::string geometryStr = root["geometry"].get<std::string>(); + std::stringstream sgeom(geometryStr); + std::vector<double> vals; + while (sgeom.good()) { + if (sgeom.peek() == ',') sgeom.ignore(); + double tmp; + if (sgeom >> tmp) { + vals.push_back(tmp); + } else { + sgeom.clear(); + sgeom.ignore(); + } + } + if (vals.size() == 6) { + a = vals[0]; + b = vals[1]; + c = vals[2]; + alpha = vals[3]; + beta = vals[4]; + gamma = vals[5]; + } + } + + // Create 3×3 lattice from (a, b, c, alpha, beta, gamma) + double cellMat[3][3]; + makeCellMatrix(a, b, c, alpha, beta, gamma, cellMat); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + m_structure.cell(i, j) = cellMat[i][j]; + } + } + + //------------------------------------------------------------------ + // 3) Parse species (e.g. "Ag,K,Se,Ta"), storing each symbol in a vector. + //------------------------------------------------------------------ + std::vector<std::string> speciesList; + { + std::stringstream sss(species); + while (sss.good()) { + if (sss.peek() == ',') sss.ignore(); + std::string sp; + if (std::getline(sss, sp, ',')) { + speciesList.push_back(sp); + } + } + } + + //------------------------------------------------------------------ + // 4) Parse composition (e.g. "6,6,16,4") into integer stoich counts. + //------------------------------------------------------------------ + std::vector<int> stoichCount; + { + std::stringstream sst(composition); + while (sst.good()) { + if (sst.peek() == ',') sst.ignore(); + double st; + if (sst >> st) { + // Round in case of fractional + int count = static_cast<int>(std::round(st)); + stoichCount.push_back(count); + } else { + sst.clear(); + sst.ignore(); + } + } + } + + //------------------------------------------------------------------ + // 5) Acquire positions from "positions_cartesian": + // "x1,y1,z1;x2,y2,z2;..." + //------------------------------------------------------------------ + std::vector<std::array<double,3>> positions; + if (root.contains("positions_cartesian")) { + std::string posStr = root["positions_cartesian"].get<std::string>(); + std::stringstream sPos(posStr); + while (sPos.good()) { + std::string triplet; + if (!std::getline(sPos, triplet, ';')) break; + if (triplet.empty()) continue; + + std::array<double,3> coords{0,0,0}; + std::stringstream sTrip(triplet); + for (int i = 0; i < 3; ++i) { + if (sTrip.peek() == ',') sTrip.ignore(); + double val = 0.0; + if (sTrip >> val) { + coords[i] = val; + } else { + sTrip.clear(); + sTrip.ignore(); + } + } + positions.push_back(coords); + } + } + + //------------------------------------------------------------------ + // 6) Construct the final atoms for m_structure: + // if species+composition is consistent, each site gets + // the correct element symbol; otherwise, fallback "X". + //------------------------------------------------------------------ + m_structure.atoms.clear(); + m_structure.atoms.reserve(positions.size()); + + size_t totalCount = 0; + for (auto c : stoichCount) totalCount += c; + + bool consistent = (!speciesList.empty() + && !stoichCount.empty() + && totalCount == positions.size()); + + if (!consistent) { + // If mismatch, label everything "X". + for (auto &coord : positions) { + Atom atom; + auto unknown = PeriodicTable().find_by_symbol("X"); + static_cast<Element &>(atom) = unknown; + atom.position[0] = coord[0]; + atom.position[1] = coord[1]; + atom.position[2] = coord[2]; + m_structure.atoms.push_back(atom); + } + return; + } + + // Distribute species among positions in order. + size_t idxPos = 0; + for (size_t iSpec = 0; iSpec < speciesList.size(); ++iSpec) { + Element e = PeriodicTable().find_by_symbol(speciesList[iSpec]); + for (int rep = 0; rep < stoichCount[iSpec]; ++rep) { + Atom atom; + static_cast<Element &>(atom) = e; + atom.position[0] = positions[idxPos][0]; + atom.position[1] = positions[idxPos][1]; + atom.position[2] = positions[idxPos][2]; + m_structure.atoms.push_back(atom); + ++idxPos; + } + } +} + +/* + makeCellMatrix(): + Builds the standard 3×3 lattice vectors from: + - a,b,c (lattice lengths) + - alpha, beta, gamma (lattice angles in degrees) + + The formula is standard: + R0 = ( a, 0, 0 ) + R1 = ( b cos(γ), b sin(γ), 0 ) + R2 = ( c cos(β), c [cos(α) − cos(β) cos(γ)] / sin(γ), + c sqrt( 1 − cos²(β) − ... ) ) + + The final line for c is chosen so the angles are satisfied. +*/ +void AflowReader::makeCellMatrix(double a, double b, double c, + double alphaDeg, double betaDeg, double gammaDeg, + double (&cell)[3][3]) { + double alpha = alphaDeg * M_PI / 180.0; + double beta = betaDeg * M_PI / 180.0; + double gamma = gammaDeg * M_PI / 180.0; + + cell[0][0] = a; + cell[0][1] = 0.0; + cell[0][2] = 0.0; + + cell[1][0] = b * std::cos(gamma); + cell[1][1] = b * std::sin(gamma); + cell[1][2] = 0.0; + + cell[2][0] = c * std::cos(beta); + + cell[2][1] = c * (std::cos(alpha) - std::cos(beta)*std::cos(gamma)) + / std::sin(gamma); + + double term = 1.0 + - std::cos(beta) * std::cos(beta) + - std::pow( + (std::cos(alpha) + - std::cos(beta)*std::cos(gamma)) + / std::sin(gamma), + 2.0 + ); + cell[2][2] = c * std::sqrt(term > 0.0 ? term : 0.0); + // Remaining off-diagonal elements are zero-initialized by default. +} + diff --git a/src/cif_reader.cpp b/src/cif_reader.cpp index 1cba6f233128def86aa3101faddd9079a91365a1..5fb292aea2fd37a19ea5bf774116bfb4e2710781 100644 --- a/src/cif_reader.cpp +++ b/src/cif_reader.cpp @@ -15,12 +15,13 @@ void CifReader::read(const std::string &path) { throw std::runtime_error("CifReader::read: Cannot open " + path); } + // Label the source + m_structure.label = "CIF file: " + path; + std::ostringstream buffer; buffer << ifs.rdbuf(); parseCifContents(buffer.str()); - // Label the source - m_structure.label = "CIF file: " + path; } Structure CifReader::getStructure() const { return m_structure; } @@ -42,6 +43,12 @@ void CifReader::parseCifContents(const std::string &fileContents) { auto sites = struc.get_all_unit_cell_sites(); + auto volume = struc.cell.volume; + auto symmetry = struc.spacegroup_hm; + auto elements = struc.atom_types; + m_structure.label += " | Symmetry: " + symmetry; + m_structure.label += " | Volume: " + std::to_string(volume); + m_structure.atoms.reserve(sites.size()); for (const auto &site : sites) { auto p = struc.cell.orthogonalize(site.fract); diff --git a/src/cod_reader.cpp b/src/cod_reader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..226b6090af34be83651d9eb60b7066f4f2c44633 --- /dev/null +++ b/src/cod_reader.cpp @@ -0,0 +1,89 @@ +#include <tadah/mlip/structure_readers/cod_reader.h> + +#include <curl/curl.h> +#include <stdexcept> +#include <string> + +/* + writeDataCallback() is used by libcurl to write received bytes + into a std::string pointed to by userData. +*/ +size_t codReader::writeDataCallback(void* ptr, size_t size, size_t nmemb, void* userData) { + size_t totalSize = size * nmemb; + auto* str = static_cast<std::string*>(userData); + str->append(static_cast<char*>(ptr), totalSize); + return totalSize; +} + +/* + read() determines whether the given path is a URL or a COD entry ID, + constructs the proper URL if needed, then downloads the CIF data as a string. + The data is parsed using an existing CifReader, and stored in m_structure. +*/ +void codReader::read(const std::string &path) { + // Determines final URL to query + std::string codUrl; + if (path.find("http://") == std::string::npos && + path.find("https://") == std::string::npos) { + // If no ".cif" suffix found, append it + if (path.find(".cif") == std::string::npos) { + codUrl = "https://www.crystallography.net/cod/" + path + ".cif"; + } else { + codUrl = "https://www.crystallography.net/cod/" + path; + } + } else { + // Already a valid URL + codUrl = path; + } + + // Uses libcurl to download the CIF data as a string + const std::string cifData = fetchDataFromURL(codUrl); + + // Uses existing CifReader to parse the CIF data + CifReader creader; + creader.parseCifContents(cifData); + + // Retrieves the parsed Structure + m_structure = creader.getStructure(); + m_structure.label.insert(0,"COD entry: " + path); +} + +/* + getStructure() returns the most recently read structure for usage + elsewhere in the codebase. +*/ +Structure codReader::getStructure() const { + return m_structure; +} + +/* + fetchDataFromURL() configures libcurl to make an HTTP GET request + and returns the response content. +*/ +std::string codReader::fetchDataFromURL(const std::string &url) const { + CURL* curlHandle = curl_easy_init(); + if (!curlHandle) { + throw std::runtime_error("Failed to initialize libcurl."); + } + + // Sets the URL and follows any redirection + curl_easy_setopt(curlHandle, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curlHandle, CURLOPT_FOLLOWLOCATION, 1L); + + // Prepares string buffer for collected data + std::string buffer; + curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, writeDataCallback); + curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, &buffer); + + // Executes the request + CURLcode result = curl_easy_perform(curlHandle); + curl_easy_cleanup(curlHandle); + + // Checks for errors during transfer + if (result != CURLE_OK) { + throw std::runtime_error(std::string("Data transfer failed: ") + + curl_easy_strerror(result)); + } + return buffer; +} + diff --git a/src/structure_reader_selector.cpp b/src/structure_reader_selector.cpp index 3a4dcc45cdb69b44f2f4ab09f6c6a4136a63639e..1ce8bfd539115698905190bbf19016abc1e9e445 100644 --- a/src/structure_reader_selector.cpp +++ b/src/structure_reader_selector.cpp @@ -3,8 +3,8 @@ #include <tadah/mlip/structure_readers/structure_reader_selector.h> #include <tadah/mlip/structure_readers/vasp_poscar_reader.h> #include <tadah/mlip/structure_readers/materials_project_reader.h> -/*#include <tadah/mlip/structure_readers/cod_reader.h>*/ -/*#include <tadah/mlip/structure_readers/aflow_reader.h>*/ +#include <tadah/mlip/structure_readers/cod_reader.h> +#include <tadah/mlip/structure_readers/aflow_reader.h> #include <tadah/mlip/key_storage/key_manager.h> #include <tadah/mlip/structure_readers/nomad_reader.h> @@ -33,12 +33,12 @@ StructureReaderSelector::getReader(const std::string &pathOrId) { } else if (format == "CELL") { return std::make_unique<CastepCellReader>(); } - /*else if (format == "COD") {*/ - /* return std::make_unique<CODReader>();*/ - /*}*/ - /*else if (format == "AFLOW") {*/ - /* return std::make_unique<AflowReader>();*/ - /*}*/ + else if (format == "COD") { + return std::make_unique<codReader>(); + } + else if (format == "AFLOW") { + return std::make_unique<AflowReader>(); + } else if (format == "NOMAD") { return std::make_unique<NomadReader>(); } else {