From cecc436292ce7c03ad710d46099790bb38dbcf71 Mon Sep 17 00:00:00 2001
From: Marcin Kirsz <mkirsz@ed.ac.uk>
Date: Fri, 6 Dec 2024 16:18:19 +0000
Subject: [PATCH] Castep/VASP/LAMMPS data writers, including factory

---
 .../dataset_readers/dataset_reader_selector.h |  4 +-
 .../mlip/dataset_writers/castep_cell_writer.h | 23 ++++++
 .../mlip/dataset_writers/dataset_writer.h     | 23 ++++++
 .../dataset_writers/dataset_writer_selector.h | 15 ++++
 .../dataset_writers/lammps_structure_writer.h | 21 +++++
 .../mlip/dataset_writers/vasp_poscar_writer.h | 21 +++++
 src/castep_cell_writer.cpp                    | 53 ++++++++++++
 src/dataset_writer_selector.cpp               | 23 ++++++
 src/lammps_structure_writer.cpp               | 81 +++++++++++++++++++
 src/vasp_poscar_writer.cpp                    | 79 ++++++++++++++++++
 10 files changed, 341 insertions(+), 2 deletions(-)
 create mode 100644 include/tadah/mlip/dataset_writers/castep_cell_writer.h
 create mode 100644 include/tadah/mlip/dataset_writers/dataset_writer.h
 create mode 100644 include/tadah/mlip/dataset_writers/dataset_writer_selector.h
 create mode 100644 include/tadah/mlip/dataset_writers/lammps_structure_writer.h
 create mode 100644 include/tadah/mlip/dataset_writers/vasp_poscar_writer.h
 create mode 100644 src/castep_cell_writer.cpp
 create mode 100644 src/dataset_writer_selector.cpp
 create mode 100644 src/lammps_structure_writer.cpp
 create mode 100644 src/vasp_poscar_writer.cpp

diff --git a/include/tadah/mlip/dataset_readers/dataset_reader_selector.h b/include/tadah/mlip/dataset_readers/dataset_reader_selector.h
index 9c4808b..a09803e 100644
--- a/include/tadah/mlip/dataset_readers/dataset_reader_selector.h
+++ b/include/tadah/mlip/dataset_readers/dataset_reader_selector.h
@@ -15,11 +15,11 @@ public:
   /**
    * @brief Factory method to create specific DatasetReader objects.
    *
-   * @param type The type of the dataset reader to create.
+   * @param filepath File path to check the content.
    * @param db Reference to a StructureDB object to store parsed data.
    * @return A unique pointer to a DatasetReader object.
    */
-  static std::unique_ptr<DatasetReader> get_reader(const std::string& type, StructureDB& db);
+  static std::unique_ptr<DatasetReader> get_reader(const std::string& filepath, StructureDB& db);
 
   /**
    * @brief Determines the file type based on its content.
diff --git a/include/tadah/mlip/dataset_writers/castep_cell_writer.h b/include/tadah/mlip/dataset_writers/castep_cell_writer.h
new file mode 100644
index 0000000..90b799c
--- /dev/null
+++ b/include/tadah/mlip/dataset_writers/castep_cell_writer.h
@@ -0,0 +1,23 @@
+#ifndef CASTEP_CELL_WRITER_H
+#define CASTEP_CELL_WRITER_H
+
+#include <tadah/mlip/structure.h>
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/dataset_writer.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <stdexcept>
+class CastepCellWriter : public DatasetWriter {
+public:
+  CastepCellWriter(StructureDB& db);
+
+  virtual void write_data(const std::string& filename, const size_t i) override;
+
+};
+
+#endif // CASTEP_CELL_WRITER_H
+
+
diff --git a/include/tadah/mlip/dataset_writers/dataset_writer.h b/include/tadah/mlip/dataset_writers/dataset_writer.h
new file mode 100644
index 0000000..91ff858
--- /dev/null
+++ b/include/tadah/mlip/dataset_writers/dataset_writer.h
@@ -0,0 +1,23 @@
+#ifndef DATASET_WRITER_H
+#define DATASET_WRITER_H
+
+#include <tadah/mlip/structure_db.h>
+#include <string>
+#include <vector>
+
+class DatasetWriter {
+  public:
+    virtual ~DatasetWriter() = default;
+
+    virtual void write_data(const std::string& filename, const size_t i) = 0;
+
+    DatasetWriter(StructureDB& db) : stdb(db) {};
+
+    virtual void set_precision(const size_t _p) { p = _p; w = p+6; };
+
+  protected:
+    StructureDB& stdb;
+    double p = 10;  // output precision
+    double w = p+6;  // column width
+};
+#endif // DATASET_WRITER_H
diff --git a/include/tadah/mlip/dataset_writers/dataset_writer_selector.h b/include/tadah/mlip/dataset_writers/dataset_writer_selector.h
new file mode 100644
index 0000000..7ea4f85
--- /dev/null
+++ b/include/tadah/mlip/dataset_writers/dataset_writer_selector.h
@@ -0,0 +1,15 @@
+#ifndef DATASET_WRITER_SELECTOR_H
+#define DATASET_WRITER_SELECTOR_H
+
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/dataset_writer.h>
+#include <string>
+#include <memory>
+
+class DatasetWriterSelector {
+  public:
+    static std::unique_ptr<DatasetWriter> get_writer(const std::string& type, StructureDB& db);
+
+};
+
+#endif // DATASET_WRITER_SELECTOR_H
diff --git a/include/tadah/mlip/dataset_writers/lammps_structure_writer.h b/include/tadah/mlip/dataset_writers/lammps_structure_writer.h
new file mode 100644
index 0000000..5ee539a
--- /dev/null
+++ b/include/tadah/mlip/dataset_writers/lammps_structure_writer.h
@@ -0,0 +1,21 @@
+#ifndef LAMMPS_STRUCTURE_WRITER_H
+#define LAMMPS_STRUCTURE_WRITER_H
+
+#include <tadah/mlip/structure.h>
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/dataset_writer.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <stdexcept>
+class LammpsStructureWriter : public DatasetWriter {
+public:
+  LammpsStructureWriter(StructureDB& db);
+
+  virtual void write_data(const std::string& filename, const size_t i) override;
+
+};
+
+#endif // LAMMPS_STRUCTURE_WRITER_H
diff --git a/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h b/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h
new file mode 100644
index 0000000..abd58b8
--- /dev/null
+++ b/include/tadah/mlip/dataset_writers/vasp_poscar_writer.h
@@ -0,0 +1,21 @@
+#ifndef VASP_POSCAR_WRITER_H
+#define VASP_POSCAR_WRITER_H
+
+#include <tadah/mlip/structure.h>
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/dataset_writer.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <stdexcept>
+class VaspPoscarWriter : public DatasetWriter {
+public:
+  VaspPoscarWriter(StructureDB& db);
+
+  virtual void write_data(const std::string& filename, const size_t i) override;
+
+};
+
+#endif // VASP_POSCAR_WRITER_H
diff --git a/src/castep_cell_writer.cpp b/src/castep_cell_writer.cpp
new file mode 100644
index 0000000..144da27
--- /dev/null
+++ b/src/castep_cell_writer.cpp
@@ -0,0 +1,53 @@
+#include <tadah/mlip/structure.h>
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/castep_cell_writer.h>
+
+CastepCellWriter::CastepCellWriter(StructureDB& db) : DatasetWriter(db) {}
+
+void CastepCellWriter::write_data(const std::string& filename, const size_t i) {
+
+  if (i >= stdb.size()) {
+    throw std::out_of_range("Index i is out of range.");
+  }
+    
+  std::ofstream file(filename);
+  if (!file.is_open()) {
+    throw std::runtime_error("Could not open the file: " + filename);
+  }
+
+  const Structure &st = stdb(i);
+
+  // write label
+  file << "# " << st.label << std::endl;
+
+  // write cell
+  file << "%BLOCK LATTICE_CART" << std::endl;
+  file << "ANG" << std::endl;
+  for (int i=0; i<3; ++i) {
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,0);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,1);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,2);
+    file << std::endl;
+  }
+  file << "%ENDBLOCK LATTICE_CART" << std::endl;
+
+  file << std::endl;
+
+  file << "%BLOCK POSITIONS_ABS" << std::endl;
+  for (const auto &atom : st) {
+    file << std::right << std::fixed << std::setw(w)
+      << atom.symbol;
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << atom.position(0);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << atom.position(1);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << atom.position(2);
+    file << std::endl;
+  }
+  file << "%ENDBLOCK POSITIONS_ABS	" << std::endl;
+  file.close();
+}
diff --git a/src/dataset_writer_selector.cpp b/src/dataset_writer_selector.cpp
new file mode 100644
index 0000000..0be08eb
--- /dev/null
+++ b/src/dataset_writer_selector.cpp
@@ -0,0 +1,23 @@
+#include <tadah/mlip/dataset_writers/dataset_writer_selector.h>
+#include <tadah/mlip/dataset_writers/castep_cell_writer.h>
+#include <tadah/mlip/dataset_writers/vasp_poscar_writer.h>
+#include <tadah/mlip/dataset_writers/lammps_structure_writer.h>
+
+#include <fstream>
+#include <iostream>
+
+// Factory method implementation
+std::unique_ptr<DatasetWriter> DatasetWriterSelector::get_writer(const std::string& type, StructureDB& db) {
+
+  if (type == "CASTEP") {
+    return std::make_unique<CastepCellWriter>(db);
+  } else if (type == "VASP") {
+    return std::make_unique<VaspPoscarWriter>(db);
+  } else if (type == "LAMMPS") {
+    return std::make_unique<LammpsStructureWriter>(db);
+  } else {
+    std::cerr << "Unknown type! Returning nullptr." << std::endl;
+    return nullptr;
+  }
+
+}
diff --git a/src/lammps_structure_writer.cpp b/src/lammps_structure_writer.cpp
new file mode 100644
index 0000000..0c37059
--- /dev/null
+++ b/src/lammps_structure_writer.cpp
@@ -0,0 +1,81 @@
+#include <tadah/mlip/structure.h>
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/lammps_structure_writer.h>
+
+LammpsStructureWriter::LammpsStructureWriter(StructureDB& db) : DatasetWriter(db) {}
+
+void LammpsStructureWriter::write_data(const std::string& filename, const size_t i) {
+
+  if (i >= stdb.size()) {
+    throw std::out_of_range("Index i is out of range.");
+  }
+    
+  std::ofstream file(filename);
+  if (!file.is_open()) {
+    throw std::runtime_error("Could not open the file: " + filename);
+  }
+
+  const Structure &st = stdb(i);
+ 
+  // compute number of atoms for a given element
+  const auto &elements = st.get_unique_elements();
+  std::map<std::string, size_t> nelements;
+  std::map<std::string, size_t> type;
+  // Initialize the count for each element
+  for (const auto& element : elements) {
+    nelements[element.symbol] = 0;
+  }
+  // then count...
+  size_t t=0;
+  for (const auto &atom : st) {
+    nelements[atom.symbol]++; 
+    if (type.find(atom.symbol) == type.end()) {
+      type[atom.symbol] = ++t;
+    }
+  }
+
+  // BEGIN OF LAMMPS HEADER
+  file << st.label << std::endl;
+  file << std::endl;
+  file << st.natoms() << " atoms" << std::endl;
+  file << nelements.size() << " atom types" << std::endl;
+  file << std::endl;
+
+  // General triclinic box format
+  std::vector<std::string> abcvec = {"avec", "bvec", "cvec"};
+  for (int i=0; i<3; ++i) {
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,0);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,1);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,2)
+      << "    " << abcvec[i];
+    file << std::endl;
+  }
+  file << "    0.0 0.0 0.0 abc origin" << std::endl;
+  // END LAMMPS HEADER
+
+  // BEGIN LAMMPS BODY
+  file << std::endl;
+  file << "Atoms # atomic" << std::endl;
+  file << std::endl;
+  // atomic: atom-ID [int, 1-Natoms]    atom-type [int, 1-Ntype]    x y z
+
+  size_t idx=1;
+  for (const auto &atom : st) {
+    file << std::right << std::fixed << std::setw(w) << idx;
+    file << std::right << std::fixed << std::setw(w) << type[atom.symbol];
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << atom.position(0);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << atom.position(1);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << atom.position(2);
+    file << std::endl;
+    idx++;
+  }
+  // END LAMMPS BODY
+  
+  file.close();
+}
diff --git a/src/vasp_poscar_writer.cpp b/src/vasp_poscar_writer.cpp
new file mode 100644
index 0000000..3314418
--- /dev/null
+++ b/src/vasp_poscar_writer.cpp
@@ -0,0 +1,79 @@
+#include <tadah/mlip/structure.h>
+#include <tadah/mlip/structure_db.h>
+#include <tadah/mlip/dataset_writers/vasp_poscar_writer.h>
+
+#include <fstream>
+
+VaspPoscarWriter::VaspPoscarWriter(StructureDB& db) : DatasetWriter(db) {}
+
+void VaspPoscarWriter::write_data(const std::string& filename, const size_t i) {
+
+  if (i >= stdb.size()) {
+    throw std::out_of_range("Index i is out of range.");
+  }
+    
+  std::ofstream file(filename);
+  if (!file.is_open()) {
+    throw std::runtime_error("Could not open the file: " + filename);
+  }
+
+  const Structure &st = stdb(i);
+
+  // write scaling factor
+  file << st.label << std::endl;
+  file << "1.0" << std::endl;
+
+  // write cell
+  for (int i=0; i<3; ++i) {
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,0);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,1);
+    file << std::right << std::fixed << std::setw(w)
+      << std::setprecision(p) << st.cell(i,2);
+    file << std::endl;
+  }
+
+
+  // compute number of atoms for a given element
+  const auto &elements = st.get_unique_elements();
+  std::map<std::string, size_t> nelements;
+  // Initialize the count for each element
+  for (const auto& element : elements) {
+    nelements[element.symbol] = 0;
+  }
+  // then count...
+  for (const auto &atom : st) {
+    nelements[atom.symbol]++; 
+  }
+
+  // write elements
+  for (const auto& pair: nelements) {
+    file << pair.first << " ";
+  }
+  file << std::endl;
+
+  // write number of every elements
+  for (const auto& pair: nelements) {
+    file << pair.second << " ";
+  }
+  file << std::endl;
+
+  file << "Cartesian" << std::endl;
+
+  for (const auto& pair: nelements) {
+    for (const auto &atom : st) {
+      if (pair.first == atom.symbol) {
+        file << std::right << std::fixed << std::setw(w)
+          << std::setprecision(p) << atom.position(0);
+        file << std::right << std::fixed << std::setw(w)
+          << std::setprecision(p) << atom.position(1);
+        file << std::right << std::fixed << std::setw(w)
+          << std::setprecision(p) << atom.position(2);
+        file << std::endl;
+      }
+    }
+  }
+
+  file.close();
+}
-- 
GitLab