diff --git a/include/tadah/mlip/structure_db.h b/include/tadah/mlip/structure_db.h
index a0bc3443cb14b5e467ed64e5a77f4016c6ce1be5..07d2c23b99af4e4c84cb274bbfb9567a1e88aae2 100644
--- a/include/tadah/mlip/structure_db.h
+++ b/include/tadah/mlip/structure_db.h
@@ -154,4 +154,3 @@ private:
 } // namespace tadah
 
 #endif // TADAH_MLIP_STRUCTURE_DB_H
-
diff --git a/src/structure.cpp b/src/structure.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ee3ca19c640ae715b5e7d92070e5cebadf3e023d
--- /dev/null
+++ b/src/structure.cpp
@@ -0,0 +1,354 @@
+#include <tadah/mlip/structure.h>
+#include <tadah/core/periodic_table.h>
+
+#include <stdexcept>
+#include <cmath>
+
+Structure::Structure() {
+  PeriodicTable::initialize();
+}
+
+const Atom &Structure::operator()(const size_t i) const{
+  return atoms[i];
+}
+
+void Structure::add_atom(Atom a) {
+  atoms.push_back(a);
+}
+void Structure::remove_atom(const size_t i) {
+  atoms.erase(atoms.begin()+i);
+}
+
+Vec3d Structure::nn_pos(const size_t i, const size_t n) const
+{
+    // (A) Global index of this neighbor
+    const size_t neighborIndex = near_neigh_idx[i][n];
+
+    // (B) Atom's original "unshifted" position:
+    const Vec3d &posNeighbor = atoms[neighborIndex].position;
+
+    // (C) Convert the stored shift -> real displacement shiftDisp
+    //     If near_neigh_shift[i][n] is an integer triple (n1, n2, n3),
+    //     multiply by the cell. Otherwise, if it's already in real space,
+    //     you can just do: Vec3d shiftDisp = near_neigh_shift[i][n].
+    Vec3d shift = near_neigh_shift[i][n]; // might be (n1, n2, n3)
+
+    // If shift is integer triple (n1, n2, n3), multiply with cell:
+    Vec3d shiftDisp;
+    shiftDisp[0] = shift[0]*cell(0,0) + shift[1]*cell(0,1) + shift[2]*cell(0,2);
+    shiftDisp[1] = shift[0]*cell(1,0) + shift[1]*cell(1,1) + shift[2]*cell(1,2);
+    shiftDisp[2] = shift[0]*cell(2,0) + shift[1]*cell(2,1) + shift[2]*cell(2,2);
+
+    // (D) Final neighbor position = unshifted position + shiftDisp
+    Vec3d posShifted = posNeighbor + shiftDisp;
+    return posShifted;
+}
+
+size_t Structure::natoms() const {
+  return atoms.size();
+}
+
+size_t Structure::nn_size(size_t i) const {
+  return near_neigh_idx[i].size();
+}
+
+int Structure::read(std::ifstream &ifs) {
+  std::string line;
+  // ignore extra empty lines
+  std::getline(ifs,line);
+  if(line.empty()) return 1;
+
+  // OK, process structural data
+  // read first line as a label
+  label = line;
+
+  // the second line could be energy or
+  // a scalling factors eweight fweight sweight
+  std::getline(ifs,line);
+  std::stringstream stream(line);
+  std::string temp;
+  size_t count = 0;
+  while(stream >> temp) { ++count;}
+
+  if (count == 3) {
+    stream.clear();
+    stream.seekg(0, std::ios::beg);
+    stream >> eweight >> fweight >> sweight;
+    std::getline(ifs,line);
+    std::stringstream stream2(line);
+    // energy and T
+    stream2 >> energy;
+    if(stream2) stream2 >> T;
+  }
+  else if (count == 2) {
+    stream.clear();
+    stream.seekg(0, std::ios::beg);
+    // energy and T
+    stream >> energy >> T;
+  }
+  else {
+    energy = std::stod(line);
+  }
+
+  // 3 lines, 9 elements are for the cell matrix
+  for (int i=0; i<3; ++i)
+    for (int j=0; j<3; ++j)
+      ifs >> cell(i,j);
+
+  // 3 lines, 9 elements are for the stress matrix
+  // if (use_stress)
+  for (int i=0; i<3; ++i)
+    for (int j=0; j<3; ++j)
+      ifs >> stress(i,j);
+
+  // move to next line
+  std::getline(ifs,line);
+
+  // make sure atoms vector is empty
+  atoms.clear();
+
+  char symbol[3];
+  double px,py,pz,fx,fy,fz;
+  while(std::getline(ifs,line)) {
+    if(line.empty()) break;
+    //if(line == " ") break;
+    if(line == "\r") break;     // detects windows newline
+
+    std::istringstream tmp(line);
+    tmp >> symbol >> px >> py >> pz >> fx >> fy >> fz;
+    Element e = PeriodicTable::find_by_symbol(symbol);
+    atoms.push_back(Atom(e,px,py,pz,fx,fy,fz));
+    //Atom &a = atoms.back();
+
+    //tmp >> a.label >> a.position(0) >> a.position(1) >> a.position(2);
+    // if (use_force)
+    //tmp >> a.force(0) >> a.force(1) >> a.force(2);
+    //std::cout <<  a.force(0) << " " <<  a.force(1) << " " <<  a.force(2) << std::endl;;
+  }
+  return 0;
+}
+
+void Structure::read(const std::string fn) {
+  std::ifstream ifs(fn);
+  if (!ifs.is_open()) {
+    throw std::runtime_error("File does not exist: "+fn);
+  }
+  read(ifs);
+  ifs.close();
+}
+
+void Structure::save(std::ofstream &ofs) const {
+  ofs << label << std::endl;
+  ofs << eweight << " " << fweight << " " << sweight << std::endl;
+  ofs << energy << std::endl;
+  for (int i=0; i<3; ++i) {
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,0);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,1);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,2);
+    ofs << std::endl;
+  }
+
+  for (int i=0; i<3; ++i) {
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,0);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,1);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,2);
+    ofs << std::endl;
+  }
+
+  for (const Atom &a:atoms) {
+    ofs << a.symbol[0] << a.symbol[1];
+    ofs << std::setw(w-2) << std::fixed << std::right
+      << std::setprecision(p) << a.position(0);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.position(1);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.position(2);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.force(0);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.force(1);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.force(1);
+    ofs << std::endl;
+  }
+}
+
+void Structure::save(const std::string fn) const {
+  std::ofstream ofs(fn);
+  save(ofs);
+  ofs.close();
+}
+size_t Structure::get_nn_iindex(const size_t i, const size_t j, const size_t jj) const {
+  Vec3d shift_ijj = -near_neigh_shift[i][jj];
+  size_t ii = 0;
+
+  for (size_t x=0; x<near_neigh_idx[j].size(); ++x) {
+    if (near_neigh_idx[j][x] == i)
+      if (near_neigh_shift[j][ii] == shift_ijj)
+        break;
+    ii++;
+  }
+  return ii;
+
+}
+double Structure::get_volume() const {
+  return cell.row(0)*(cell.row(1).cross(cell.row(2)));
+}
+double Structure::get_density() const {
+  double V = get_volume();
+  V*=1e-24; // convert to cm^3
+  double amu = 1.66053906660e-24; // g
+  double mass = 0;
+  for (const auto& a:atoms) mass += PeriodicTable::get_mass(a.Z);
+  return amu*mass/V;
+}
+
+double Structure::get_temperature() const {
+  return T;
+}
+
+double Structure::get_virial_pressure() const {
+  return stress.trace()/get_volume()/3;
+}
+
+double Structure::get_pressure(double T, double kB) const {
+  double vpress = get_virial_pressure();
+  return vpress + natoms()*kB*T/get_volume();
+}
+bool Structure::operator==(const Structure& st) const
+{
+  double EPSILON = std::numeric_limits<double>::epsilon();
+  bool result =
+    cell.isApprox(st.cell, EPSILON)
+    && stress.isApprox(st.stress, EPSILON)
+    && natoms()==st.natoms()
+    && (std::fabs(eweight-st.eweight) < EPSILON)
+    && (std::fabs(fweight-st.fweight) < EPSILON)
+    && (std::fabs(sweight-st.sweight) < EPSILON)
+    && (std::fabs(energy-st.energy) < EPSILON)
+    ;
+  if (!result) return result;
+  for (size_t i=0;i<natoms();++i) {
+    result = atoms[i]==st.atoms[i];
+    if (!result) return result;
+  }
+  return result;
+}
+bool Structure::is_the_same(const Structure& st, double thr) const
+{
+  bool result =
+    cell.isApprox(st.cell, thr)
+    && natoms()==st.natoms();
+  if (!result) return result;
+
+  size_t count=0;;
+  for (size_t i=0;i<natoms();++i) {
+    for (size_t j=i;j<natoms();++j) {
+      result = atoms[i].is_the_same(st.atoms[j], thr);
+      if (result) count++;
+    }
+  }
+
+  return count==natoms() ? true : false;
+}
+int Structure::next_structure(std::ifstream &ifs) {
+  std::string line;
+  std::getline(ifs,line);
+  if(line.empty()) return 0;
+
+  std::getline(ifs,line);
+  // the second line could be energy or
+  // a scalling factors eweight fweight sweight
+  std::stringstream stream(line);
+  std::string temp;
+  size_t count = 0;
+  while(stream >> temp) { ++count;}
+  // optional if second line is a weight
+  if (count==3)
+    std::getline(ifs,line);
+
+  for (size_t i=0; i<6;++i)
+    std::getline(ifs,line);
+
+  int natoms=0;
+  while(std::getline(ifs,line)) {
+    if(line.empty()) break;
+    if(line == "\r") break;     // detects windows newline
+    natoms++;
+  }
+  return natoms;
+}
+void Structure::clear_nn() {
+  near_neigh_shift.clear();
+  near_neigh_idx.clear();
+}
+
+std::vector<Atom>::iterator Structure::begin() { 
+    return atoms.begin(); 
+}
+
+std::vector<Atom>::iterator Structure::end() { 
+    return atoms.end(); 
+}
+
+std::vector<Atom>::const_iterator Structure::begin() const { 
+    return atoms.cbegin(); 
+}
+
+std::vector<Atom>::const_iterator Structure::end() const { 
+    return atoms.cend(); 
+}
+void Structure::dump_to_file(std::ostream& file, size_t prec) const {
+  const int n = 5;
+  file << label << std::endl;
+  file << std::fixed << std::setprecision(prec);
+  file << eweight << " " << fweight << " " << sweight << std::endl;
+  file << energy << " " << T << std::endl;
+
+  file
+    << std::setw(prec+n) << cell(0,0) << " "
+    << std::setw(prec+n) << cell(0,1) << " "
+    << std::setw(prec+n) << cell(0,2) << " " << std::endl
+    << std::setw(prec+n) << cell(1,0) << " "
+    << std::setw(prec+n) << cell(1,1) << " "
+    << std::setw(prec+n) << cell(1,2) << " " << std::endl
+    << std::setw(prec+n) << cell(2,0) << " "
+    << std::setw(prec+n) << cell(2,1) << " "
+    << std::setw(prec+n) << cell(2,2) << " " << std::endl;
+
+  file 
+    << std::setw(prec+n) << stress(0,0) << " "
+    << std::setw(prec+n) << stress(0,1) << " "
+    << std::setw(prec+n) << stress(0,2) << " " << std::endl
+    << std::setw(prec+n) << stress(1,0) << " "
+    << std::setw(prec+n) << stress(1,1) << " "
+    << std::setw(prec+n) << stress(1,2) << " " << std::endl
+    << std::setw(prec+n) << stress(2,0) << " "
+    << std::setw(prec+n) << stress(2,1) << " "
+    << std::setw(prec+n) << stress(2,2) << " " << std::endl;
+
+  for (const auto& a : atoms) {
+    file << std::setw(2) << a.symbol << " "
+      << std::setw(prec+n) << a.position[0] << " "
+      << std::setw(prec+n) << a.position[1] << " "
+      << std::setw(prec+n) << a.position[2] << " "
+      << std::setw(prec+n) << a.force[0] << " "
+      << std::setw(prec+n) << a.force[1] << " "
+      << std::setw(prec+n) << a.force[2] << std::endl;
+  }
+  file << std::endl;
+}
+void Structure::dump_to_file(const std::string& filepath, size_t prec) const {
+  std::ofstream file(filepath, std::ios::app);  // Open in append mode
+  if (!file.is_open()) {
+    std::cerr << "Error: Could not open file for writing: " << filepath << std::endl;
+    return;
+  }
+  dump_to_file(file,prec);
+  file.close();
+}
+std::set<Element> Structure::get_unique_elements() const{
+  std::set<Element> unique_elements;
+  for (const auto& a:atoms) unique_elements.insert(a);
+  return unique_elements;
+}
diff --git a/src/structure_db.cpp b/src/structure_db.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f1aaa36f17971a77b3c18ab8a31e0653aab409fb
--- /dev/null
+++ b/src/structure_db.cpp
@@ -0,0 +1,356 @@
+#include "tadah/core/element.h"
+#include <tadah/mlip/structure_db.h>
+#include <tadah/core/periodic_table.h>
+#include <cstdio>
+#include <cctype>   // For std::isspace
+
+StructureDB::StructureDB() {
+  PeriodicTable::initialize();
+}
+StructureDB::StructureDB(Config &config) {
+  PeriodicTable::initialize();
+  add(config);
+}
+
+void StructureDB::add(const std::string fn) {
+  std::ifstream ifs(fn);
+  if (!ifs.is_open()) {
+    throw std::runtime_error("DBFILE does not exist: "+fn);
+  }
+  parseFile(fn);
+  while (true) {
+    structures.push_back(Structure());
+    int t = structures.back().read(ifs);
+
+    // did we read structure succesfully?
+    // if not remove last object from the list
+    if (t==1) structures.pop_back();
+
+    if (ifs.eof()) break;
+  }
+  ifs.close();
+}
+int StructureDB::add(const std::string fn, size_t first, int N) {
+  std::ifstream ifs(fn);
+  if (!ifs.is_open()) {
+    throw std::runtime_error("DBFILE does not exist: "+fn);
+  }
+
+  // move iterator to the first structure we want to read
+  for (size_t i=0; i<first; ++i) {
+    Structure::next_structure(ifs);
+  }
+
+  int count=0;
+  while (count++<N) {
+    structures.push_back(Structure());
+    int t = structures.back().read(ifs);
+
+    // did we read structure succesfully?
+    // if not remove last object from the list
+    if (t==1) structures.pop_back();
+
+    if (ifs.eof()) break;
+  }
+  ifs.close();
+
+  return --count;
+}
+
+void StructureDB::add(const Structure &s) {
+  structures.push_back(s);
+}
+void StructureDB::add(const StructureDB &stdb) {
+  for (const auto &s: stdb) add(s);
+}
+
+void StructureDB::remove(size_t i) {
+  structures.erase(structures.begin()+i);
+}
+
+void StructureDB::add(Config &config) {
+  for (const std::string &s : config("DBFILE")) {
+    dbidx.push_back(size());
+    add(s);
+  }
+  dbidx.push_back(size());
+}
+
+size_t StructureDB::size() const {
+  return structures.size();
+}
+
+size_t StructureDB::size(size_t n) const {
+  return dbidx[n+1]-dbidx[n];
+}
+
+Structure &StructureDB::operator()(size_t s) {
+  return structures[s];
+}
+
+const Structure &StructureDB::operator()(size_t s) const {
+  return structures[s];
+}
+
+Atom &StructureDB::operator()(size_t s, size_t a) {
+  return structures[s].atoms[a];
+}
+size_t StructureDB::calc_natoms() const {
+  size_t natoms=0;
+  for (auto struc: structures) natoms += struc.natoms();
+  return natoms;
+}
+size_t StructureDB::calc_natoms(size_t n) const {
+  size_t start = dbidx[n];
+  size_t stop = dbidx[n+1];
+  size_t natoms=0;
+  for (size_t i=start; i<stop; ++i) {
+    natoms += (*this)(i).natoms();
+  }
+  return natoms;
+}
+std::set<Element> StructureDB::get_unique_elements() const {
+  std::set<Element> s;
+  for (const auto & st: structures) {
+    std::set<Element> u = st.get_unique_elements();
+    s.insert(u.cbegin(),u.cend());
+  }
+  return s;
+}
+std::set<Element> StructureDB::find_unique_elements(const std::string &fn) {
+  std::set<Element> s;
+  std::ifstream ifs(fn);
+  std::string line;
+
+  if (!ifs.is_open()) {
+    std::cerr << "Could not open the file." << std::endl;
+  }
+
+  char symbol[3];
+  while (std::getline(ifs, line)) {
+    // the second line could be energy or
+    // a scalling factors eweight fweight sweight
+    std::getline(ifs,line);  
+    std::stringstream stream(line);
+    size_t count = std::distance(std::istream_iterator<std::string>(stream),
+        std::istream_iterator<std::string>());
+
+    if (count == 3)
+      std::getline(ifs,line);
+
+    for (size_t i=0; i<6; ++i)
+      std::getline(ifs,line);
+
+    while (std::getline(ifs, line)) {
+      if(line.empty()) break;
+      sscanf(line.c_str(), "%2s", symbol);
+      s.insert(PeriodicTable::find_by_symbol(symbol));
+    }
+
+  }
+  return s;
+}
+std::set<Element> StructureDB::find_unique_elements(const Config &c) {
+  std::set<Element> s;
+  for (const auto& fn: c("DBFILE")) {
+    std::set<Element> temp = find_unique_elements(fn);
+    s.insert(temp.begin(), temp.end());
+  }
+  return s;
+}
+
+template <typename T,typename U>                                                   
+std::pair<T,U> operator+(const std::pair<T,U>  &l,const std::pair<T,U>  &r) {   
+  return {l.first+r.first,l.second+r.second};                                    
+}  
+std::pair<int,int> StructureDB::count(const Config &config) {
+  std::pair<int, int> res=std::make_pair(0,0);  
+  for (const std::string &fn : config("DBFILE")) {
+    res = res + count(fn);
+  }
+  return res;
+}
+
+std::pair<int,int> StructureDB::count(const std::string fn){
+  int nstruc=0;
+  int natoms_tot=0;
+
+  std::ifstream ifs(fn);
+  if (!ifs.is_open()) {
+    throw std::runtime_error("DBFILE does not exist: "+fn);
+  }
+  int natoms = Structure::next_structure(ifs);
+  while(natoms) {
+    natoms_tot += natoms;
+    natoms = Structure::next_structure(ifs);
+    nstruc++;
+  }
+
+  std::pair<int,int> res = std::make_pair(nstruc,natoms_tot);
+  return res;
+}
+void StructureDB::clear_nn() {
+  for (auto &struc: structures) struc.clear_nn();
+}
+void StructureDB::check_atoms_key(Config &config, std::set<Element> &unique_elements) {
+  bool error=false;
+
+  if (config.exist("ATOMS")) {
+    // user set this key so here we check does it correspond to unique_elements
+    if (unique_elements.size()!=config.size("ATOMS")) {
+      error=true;
+    }
+
+    auto set_it = unique_elements.begin();
+    auto atoms_it = config("ATOMS").begin();
+    while (set_it != unique_elements.end() && atoms_it != config("ATOMS").end()) {
+      if (set_it->symbol != (*atoms_it))  {
+        error=true;
+      }
+      ++set_it;
+      ++atoms_it;
+    }
+    if (error) {
+      throw std::runtime_error("\n"
+          "Mismatch between elements in datasets and ATOMS in the config file.\n"
+          "Please either update the ATOMS in the config file or remove ATOMS\n"
+          "key completely. Tadah! will automatically configure this key.\n"
+          );
+    }
+  } else {
+    for (const auto &s : unique_elements) config.add("ATOMS", s.symbol);
+  }
+}
+void StructureDB::check_watoms_key(Config &config, std::set<Element> &unique_elements) {
+  bool error=false;
+
+  if (config.exist("WATOMS")) {
+    // user set this key so here we check does it correspond to unique_elements
+    if (unique_elements.size()!=config.size("WATOMS")) {
+      error=true;
+    }
+    if (error) {
+      throw std::runtime_error("\n"
+          "Mismatch between elements in datasets and WATOMS in the config file.\n"
+          "Please either update the WATOMS in the config file or remove WATOMS\n"
+          "key completely. In the latter case Tadah! will use default values.\n"
+          );
+    }
+  } else {
+    for (const auto &s : unique_elements) config.add("WATOMS", s.Z);
+  }
+}
+
+std::vector<Structure>::iterator StructureDB::begin() { 
+    return structures.begin(); 
+}
+
+std::vector<Structure>::iterator StructureDB::end() { 
+    return structures.end(); 
+}
+
+std::vector<Structure>::const_iterator StructureDB::begin() const { 
+    return structures.cbegin(); 
+}
+
+std::vector<Structure>::const_iterator StructureDB::end() const { 
+    return structures.cend(); 
+}
+void StructureDB::dump_to_file(const std::string& filepath, size_t prec) const {
+  std::ofstream file(filepath, std::ios::app);  // Open in append mode
+  if (!file.is_open()) {
+    std::cerr << "Error: Could not open file for writing: " << filepath << std::endl;
+    return;
+  }
+  for (const auto &s : structures) {
+    s.dump_to_file(file,prec);
+  }
+  file.close();
+}
+
+std::string StructureDB::summary() const {
+  std::string str =  "# of structures : " + to_string(structures.size());
+
+  str +=  " | # of atoms : " + to_string(calc_natoms());
+
+  str += " | Elements : ";
+  std::set<Element> ue = get_unique_elements(); 
+  for (const auto &e: ue) str+= e.symbol + " ";
+  str+="\n";
+  return str;
+}
+void StructureDB::parseFile(const std::string& filename)
+{
+    std::ifstream fin(filename, std::ios::in | std::ios::binary);
+    if (!fin.is_open())
+    {
+        std::cerr << "Error: could not open file " << filename << "\n";
+        return;
+    }
+
+  std::size_t header_size = 9;
+
+    // Increase buffer size to speed up I/O on large files.
+    static const size_t BUFSIZE = 100ULL << 20; // 100 MiB
+    char* buffer = new char[BUFSIZE];
+    fin.rdbuf()->pubsetbuf(buffer, BUFSIZE);
+
+    std::vector<size_t> blockLineCounts;
+    blockLineCounts.reserve(10000); // Pre-allocate to reduce repeated allocations
+
+    size_t currentBlockCount = 0;
+    std::string line;
+
+    while (true)
+    {
+        if (!std::getline(fin, line))
+        {
+            // End of file or read error
+            break;
+        }
+
+        if (isBlankLine(line))
+        {
+            // We reached the end of the current block
+            if (currentBlockCount > 0)
+            {
+                blockLineCounts.push_back(currentBlockCount-header_size);
+                currentBlockCount = 0;
+            }
+        }
+        else
+        {
+            // Non-empty line => belongs to the current block
+            currentBlockCount++;
+        }
+    }
+
+    // If the last block didn’t end with a blank line, close it out
+    if (currentBlockCount > 0)
+    {
+        blockLineCounts.push_back(currentBlockCount-header_size);
+    }
+
+    fin.close();
+    delete[] buffer;
+
+    // Print the results
+    std::cout << "Found " << blockLineCounts.size() << " blocks.\n";
+    for (size_t i = 0; i < blockLineCounts.size(); i+=1000)
+    {
+        std::cout << "Block " << i << " has "
+                  << blockLineCounts[i] << " atoms\n";
+    }
+}
+
+bool StructureDB::isBlankLine(const std::string& line) const
+{
+    for (char c : line)
+    {
+        if (!std::isspace(static_cast<unsigned char>(c)))
+        {
+            return false;
+        }
+    }
+    return true;
+}