From 382a22e19e6bc40915994a8e470ed49015bbdc70 Mon Sep 17 00:00:00 2001
From: Marcin Kirsz <mkirsz@ed.ac.uk>
Date: Mon, 15 Apr 2024 14:16:33 +0100
Subject: [PATCH] Counting structures

---
 structure.cpp    | 296 ++++++++++++++++++++++++-----------------------
 structure.h      |   5 +
 structure_db.cpp |  21 ++++
 structure_db.h   |   3 +
 4 files changed, 183 insertions(+), 142 deletions(-)

diff --git a/structure.cpp b/structure.cpp
index 38aca9c..4f83d0e 100644
--- a/structure.cpp
+++ b/structure.cpp
@@ -6,188 +6,200 @@
 Structure::Structure() {}
 
 const Atom &Structure::operator()(const size_t i) const{
-    return atoms[i];
+  return atoms[i];
 }
 
 void Structure::add_atom(Atom a) {
-    atoms.push_back(a);
+  atoms.push_back(a);
 }
 void Structure::remove_atom(const size_t i) {
-    atoms.erase(atoms.begin()+i);
+  atoms.erase(atoms.begin()+i);
 }
 
 const Vec3d& Structure::nn_pos(const size_t i, const size_t n) const {
-    return near_neigh_atoms[i][n].position;
+  return near_neigh_atoms[i][n].position;
 }
 
 size_t Structure::natoms() const {
-    return atoms.size();
+  return atoms.size();
 }
 
 size_t Structure::nn_size(size_t i) const {
-    return near_neigh_atoms[i].size();
+  return near_neigh_atoms[i].size();
 }
 
 int Structure::read(std::ifstream &ifs) {
-    std::string line;
-    // ignore extra empty lines
-    std::getline(ifs,line);
-    if(line.empty()) return 1;
-
-    // OK, process structural data
-    // read first line as a label
-    label = line;
-
-    // the second line could be energy or
-    // a scalling factors eweight fweight sweight
-    std::getline(ifs,line);
-    std::stringstream stream(line);
-    std::string temp;
-    size_t count = 0;
-    while(stream >> temp) { ++count;}
-
-    if (count == 3) {
-        stream.clear();
-        stream.seekg(0, std::ios::beg);
-        stream >> eweight >> fweight >> sweight;
-        // energy
-        ifs >> energy;
-    }
-    else {
-        energy = std::stod(line);
-    }
-
-    // 3 lines, 9 elements are for the cell matrix
-    for (int i=0; i<3; ++i)
-        for (int j=0; j<3; ++j)
-            ifs >> cell(i,j);
-
-    // 3 lines, 9 elements are for the stress matrix
-    // if (use_stress)
-    for (int i=0; i<3; ++i)
-        for (int j=0; j<3; ++j)
-            ifs >> stress(i,j);
-
-    // move to next line
-    std::getline(ifs,line);
-
-    // make sure atoms vector is empty
-    atoms.clear();
-
-    char symbol[3];
-    double px,py,pz,fx,fy,fz;
-    PeriodicTable pd;
-    while(std::getline(ifs,line)) {
-        if(line.empty()) break;
-        //if(line == " ") break;
-        if(line == "\r") break;     // detects windows newline
-
-        std::istringstream tmp(line);
-        tmp >> symbol >> px >> py >> pz >> fx >> fy >> fz;
-        Element e = pd.find_by_symbol(symbol);
-        unique_elements.insert(e);
-        atoms.push_back(Atom(e,px,py,pz,fx,fy,fz));
-        //Atom &a = atoms.back();
-
-        //tmp >> a.label >> a.position(0) >> a.position(1) >> a.position(2);
-        // if (use_force)
-        //tmp >> a.force(0) >> a.force(1) >> a.force(2);
-        //std::cout <<  a.force(0) << " " <<  a.force(1) << " " <<  a.force(2) << std::endl;;
-    }
-    return 0;
+  std::string line;
+  // ignore extra empty lines
+  std::getline(ifs,line);
+  if(line.empty()) return 1;
+
+  // OK, process structural data
+  // read first line as a label
+  label = line;
+
+  // the second line could be energy or
+  // a scalling factors eweight fweight sweight
+  std::getline(ifs,line);
+  std::stringstream stream(line);
+  std::string temp;
+  size_t count = 0;
+  while(stream >> temp) { ++count;}
+
+  if (count == 3) {
+    stream.clear();
+    stream.seekg(0, std::ios::beg);
+    stream >> eweight >> fweight >> sweight;
+    // energy
+    ifs >> energy;
+  }
+  else {
+    energy = std::stod(line);
+  }
+
+  // 3 lines, 9 elements are for the cell matrix
+  for (int i=0; i<3; ++i)
+    for (int j=0; j<3; ++j)
+      ifs >> cell(i,j);
+
+  // 3 lines, 9 elements are for the stress matrix
+  // if (use_stress)
+  for (int i=0; i<3; ++i)
+    for (int j=0; j<3; ++j)
+      ifs >> stress(i,j);
+
+  // move to next line
+  std::getline(ifs,line);
+
+  // make sure atoms vector is empty
+  atoms.clear();
+
+  char symbol[3];
+  double px,py,pz,fx,fy,fz;
+  PeriodicTable pd;
+  while(std::getline(ifs,line)) {
+    if(line.empty()) break;
+    //if(line == " ") break;
+    if(line == "\r") break;     // detects windows newline
+
+    std::istringstream tmp(line);
+    tmp >> symbol >> px >> py >> pz >> fx >> fy >> fz;
+    Element e = pd.find_by_symbol(symbol);
+    unique_elements.insert(e);
+    atoms.push_back(Atom(e,px,py,pz,fx,fy,fz));
+    //Atom &a = atoms.back();
+
+    //tmp >> a.label >> a.position(0) >> a.position(1) >> a.position(2);
+    // if (use_force)
+    //tmp >> a.force(0) >> a.force(1) >> a.force(2);
+    //std::cout <<  a.force(0) << " " <<  a.force(1) << " " <<  a.force(2) << std::endl;;
+  }
+  return 0;
 }
 
 void Structure::read(const std::string fn) {
-    std::ifstream ifs(fn);
-    if (!ifs.is_open()) {
-        throw std::runtime_error("File does not exist: "+fn);
-    }
-    read(ifs);
-    ifs.close();
+  std::ifstream ifs(fn);
+  if (!ifs.is_open()) {
+    throw std::runtime_error("File does not exist: "+fn);
+  }
+  read(ifs);
+  ifs.close();
 }
 
 void Structure::save(std::ofstream &ofs) const {
-    ofs << label << std::endl;
-    ofs << eweight << " " << fweight << " " << sweight << std::endl;
-    ofs << energy << std::endl;
-    for (int i=0; i<3; ++i) {
-        ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,0);
-        ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,1);
-        ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,2);
-        ofs << std::endl;
-    }
-
-    for (int i=0; i<3; ++i) {
-        ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,0);
-        ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,1);
-        ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,2);
-        ofs << std::endl;
-    }
-
-    for (const Atom &a:atoms) {
-        ofs << a.symbol[0] << a.symbol[1];
-        ofs << std::setw(w-2) << std::fixed << std::right
-            << std::setprecision(p) << a.position(0);
-        ofs << std::setw(w) << std::fixed << std::right
-            << std::setprecision(p) << a.position(1);
-        ofs << std::setw(w) << std::fixed << std::right
-            << std::setprecision(p) << a.position(2);
-        ofs << std::setw(w) << std::fixed << std::right
-            << std::setprecision(p) << a.force(0);
-        ofs << std::setw(w) << std::fixed << std::right
-            << std::setprecision(p) << a.force(1);
-        ofs << std::setw(w) << std::fixed << std::right
-            << std::setprecision(p) << a.force(1);
-        ofs << std::endl;
-    }
+  ofs << label << std::endl;
+  ofs << eweight << " " << fweight << " " << sweight << std::endl;
+  ofs << energy << std::endl;
+  for (int i=0; i<3; ++i) {
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,0);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,1);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,2);
+    ofs << std::endl;
+  }
+
+  for (int i=0; i<3; ++i) {
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,0);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,1);
+    ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,2);
+    ofs << std::endl;
+  }
+
+  for (const Atom &a:atoms) {
+    ofs << a.symbol[0] << a.symbol[1];
+    ofs << std::setw(w-2) << std::fixed << std::right
+      << std::setprecision(p) << a.position(0);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.position(1);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.position(2);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.force(0);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.force(1);
+    ofs << std::setw(w) << std::fixed << std::right
+      << std::setprecision(p) << a.force(1);
+    ofs << std::endl;
+  }
 }
 
 void Structure::save(const std::string fn) const {
-    std::ofstream ofs(fn);
-    save(ofs);
-    ofs.close();
+  std::ofstream ofs(fn);
+  save(ofs);
+  ofs.close();
 }
 size_t Structure::get_nn_iindex(const size_t i, const size_t j, const size_t jj) const {
-    Vec3d shift_ijj = -near_neigh_shift[i][jj];
-    size_t ii = 0;
+  Vec3d shift_ijj = -near_neigh_shift[i][jj];
+  size_t ii = 0;
 
-    for (size_t x=0; x<near_neigh_idx[j].size(); ++x) {
-        if (near_neigh_idx[j][x] == i)
-            if (near_neigh_shift[j][ii] == shift_ijj)
-                break;
-        ii++;
-    }
-    return ii;
+  for (size_t x=0; x<near_neigh_idx[j].size(); ++x) {
+    if (near_neigh_idx[j][x] == i)
+      if (near_neigh_shift[j][ii] == shift_ijj)
+        break;
+    ii++;
+  }
+  return ii;
 
 }
 double Structure::get_volume() const {
-    return cell.row(0)*(cell.row(1).cross(cell.row(2)));
+  return cell.row(0)*(cell.row(1).cross(cell.row(2)));
 }
 
 double Structure::get_virial_pressure() const {
-    return stress.trace()/get_volume()/3;
+  return stress.trace()/get_volume()/3;
 }
 
 double Structure::get_pressure(double T, double kB) const {
-    double vpress = get_virial_pressure();
-    return vpress + natoms()*kB*T/get_volume();
+  double vpress = get_virial_pressure();
+  return vpress + natoms()*kB*T/get_volume();
 }
 bool Structure::operator==(const Structure& st) const
 {
-    double EPSILON = std::numeric_limits<double>::epsilon();
-    bool result =
-        cell.isApprox(st.cell, EPSILON)
-        && stress.isApprox(st.stress, EPSILON)
-        && natoms()==st.natoms()
-        && (std::fabs(eweight-st.eweight) < EPSILON)
-        && (std::fabs(fweight-st.fweight) < EPSILON)
-        && (std::fabs(sweight-st.sweight) < EPSILON)
-        && (std::fabs(energy-st.energy) < EPSILON)
-        ;
+  double EPSILON = std::numeric_limits<double>::epsilon();
+  bool result =
+    cell.isApprox(st.cell, EPSILON)
+    && stress.isApprox(st.stress, EPSILON)
+    && natoms()==st.natoms()
+    && (std::fabs(eweight-st.eweight) < EPSILON)
+    && (std::fabs(fweight-st.fweight) < EPSILON)
+    && (std::fabs(sweight-st.sweight) < EPSILON)
+    && (std::fabs(energy-st.energy) < EPSILON)
+    ;
+  if (!result) return result;
+  for (size_t i=0;i<natoms();++i) {
+    result = atoms[i]==st.atoms[i];
     if (!result) return result;
-    for (size_t i=0;i<natoms();++i) {
-        result = atoms[i]==st.atoms[i];
-            if (!result) return result;
-    }
-    return result;
+  }
+  return result;
+}
+int Structure::next_structure(std::ifstream &ifs) {
+  std::string line;
+  std::getline(ifs,line);
+  if(line.empty()) return 1;
+
+  while(std::getline(ifs,line)) {
+    if(line.empty()) break;
+    if(line == "\r") break;     // detects windows newline
+
+  }
+  return 0;
 }
diff --git a/structure.h b/structure.h
index f051a37..42fd402 100644
--- a/structure.h
+++ b/structure.h
@@ -256,6 +256,11 @@ struct Structure {
      */
     bool operator==(const Structure& st) const;
 
+    // move iterator forward to the next structure
+    // return 0 if success
+    // return 1 if there is no more structures
+    static int next_structure(std::ifstream &ifs) const;
+
     private:
     const static size_t w=15;  // controls output width
     const static size_t p=6;   // controls output precision
diff --git a/structure_db.cpp b/structure_db.cpp
index 0a70a97..0e74ee8 100644
--- a/structure_db.cpp
+++ b/structure_db.cpp
@@ -78,3 +78,24 @@ std::unordered_set<Element> StructureDB::get_unique_elements() {
             st.unique_elements.begin(),st.unique_elements.end());
     return s;
 }
+int StructureDB::count_structures(const Config &c) {
+  int count=0;
+  for (const std::string &s : config("DBFILE")) {
+    count += count_structures(fn)''
+  }
+  return count;
+}
+int StructureDB::count_structures(const std::string fn) const {
+  int count=0;
+
+  std::ifstream ifs(fn);
+  if (!ifs.is_open()) {
+    throw std::runtime_error("DBFILE does not exist: "+fn);
+  }
+
+  while(Structure::next_structure(ifs)) {
+    count++;
+  }
+
+  return count;
+}
diff --git a/structure_db.h b/structure_db.h
index fcb2794..00790aa 100644
--- a/structure_db.h
+++ b/structure_db.h
@@ -123,5 +123,8 @@ struct StructureDB {
 
     /** Return unique elements for all Structures */
     std::unordered_set<Element> get_unique_elements();
+
+    static int count_structures(const Config &c) const;
+    static int count_structures(const std::string fn) const;
 };
 #endif
-- 
GitLab