diff --git a/include/tadah/mlip/structure_db.h b/include/tadah/mlip/structure_db.h index a0bc3443cb14b5e467ed64e5a77f4016c6ce1be5..07d2c23b99af4e4c84cb274bbfb9567a1e88aae2 100644 --- a/include/tadah/mlip/structure_db.h +++ b/include/tadah/mlip/structure_db.h @@ -154,4 +154,3 @@ private: } // namespace tadah #endif // TADAH_MLIP_STRUCTURE_DB_H - diff --git a/src/structure.cpp b/src/structure.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ee3ca19c640ae715b5e7d92070e5cebadf3e023d --- /dev/null +++ b/src/structure.cpp @@ -0,0 +1,354 @@ +#include <tadah/mlip/structure.h> +#include <tadah/core/periodic_table.h> + +#include <stdexcept> +#include <cmath> + +Structure::Structure() { + PeriodicTable::initialize(); +} + +const Atom &Structure::operator()(const size_t i) const{ + return atoms[i]; +} + +void Structure::add_atom(Atom a) { + atoms.push_back(a); +} +void Structure::remove_atom(const size_t i) { + atoms.erase(atoms.begin()+i); +} + +Vec3d Structure::nn_pos(const size_t i, const size_t n) const +{ + // (A) Global index of this neighbor + const size_t neighborIndex = near_neigh_idx[i][n]; + + // (B) Atom's original "unshifted" position: + const Vec3d &posNeighbor = atoms[neighborIndex].position; + + // (C) Convert the stored shift -> real displacement shiftDisp + // If near_neigh_shift[i][n] is an integer triple (n1, n2, n3), + // multiply by the cell. Otherwise, if it's already in real space, + // you can just do: Vec3d shiftDisp = near_neigh_shift[i][n]. + Vec3d shift = near_neigh_shift[i][n]; // might be (n1, n2, n3) + + // If shift is integer triple (n1, n2, n3), multiply with cell: + Vec3d shiftDisp; + shiftDisp[0] = shift[0]*cell(0,0) + shift[1]*cell(0,1) + shift[2]*cell(0,2); + shiftDisp[1] = shift[0]*cell(1,0) + shift[1]*cell(1,1) + shift[2]*cell(1,2); + shiftDisp[2] = shift[0]*cell(2,0) + shift[1]*cell(2,1) + shift[2]*cell(2,2); + + // (D) Final neighbor position = unshifted position + shiftDisp + Vec3d posShifted = posNeighbor + shiftDisp; + return posShifted; +} + +size_t Structure::natoms() const { + return atoms.size(); +} + +size_t Structure::nn_size(size_t i) const { + return near_neigh_idx[i].size(); +} + +int Structure::read(std::ifstream &ifs) { + std::string line; + // ignore extra empty lines + std::getline(ifs,line); + if(line.empty()) return 1; + + // OK, process structural data + // read first line as a label + label = line; + + // the second line could be energy or + // a scalling factors eweight fweight sweight + std::getline(ifs,line); + std::stringstream stream(line); + std::string temp; + size_t count = 0; + while(stream >> temp) { ++count;} + + if (count == 3) { + stream.clear(); + stream.seekg(0, std::ios::beg); + stream >> eweight >> fweight >> sweight; + std::getline(ifs,line); + std::stringstream stream2(line); + // energy and T + stream2 >> energy; + if(stream2) stream2 >> T; + } + else if (count == 2) { + stream.clear(); + stream.seekg(0, std::ios::beg); + // energy and T + stream >> energy >> T; + } + else { + energy = std::stod(line); + } + + // 3 lines, 9 elements are for the cell matrix + for (int i=0; i<3; ++i) + for (int j=0; j<3; ++j) + ifs >> cell(i,j); + + // 3 lines, 9 elements are for the stress matrix + // if (use_stress) + for (int i=0; i<3; ++i) + for (int j=0; j<3; ++j) + ifs >> stress(i,j); + + // move to next line + std::getline(ifs,line); + + // make sure atoms vector is empty + atoms.clear(); + + char symbol[3]; + double px,py,pz,fx,fy,fz; + while(std::getline(ifs,line)) { + if(line.empty()) break; + //if(line == " ") break; + if(line == "\r") break; // detects windows newline + + std::istringstream tmp(line); + tmp >> symbol >> px >> py >> pz >> fx >> fy >> fz; + Element e = PeriodicTable::find_by_symbol(symbol); + atoms.push_back(Atom(e,px,py,pz,fx,fy,fz)); + //Atom &a = atoms.back(); + + //tmp >> a.label >> a.position(0) >> a.position(1) >> a.position(2); + // if (use_force) + //tmp >> a.force(0) >> a.force(1) >> a.force(2); + //std::cout << a.force(0) << " " << a.force(1) << " " << a.force(2) << std::endl;; + } + return 0; +} + +void Structure::read(const std::string fn) { + std::ifstream ifs(fn); + if (!ifs.is_open()) { + throw std::runtime_error("File does not exist: "+fn); + } + read(ifs); + ifs.close(); +} + +void Structure::save(std::ofstream &ofs) const { + ofs << label << std::endl; + ofs << eweight << " " << fweight << " " << sweight << std::endl; + ofs << energy << std::endl; + for (int i=0; i<3; ++i) { + ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,0); + ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,1); + ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << cell(i,2); + ofs << std::endl; + } + + for (int i=0; i<3; ++i) { + ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,0); + ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,1); + ofs << std::fixed << std::right << std::setw(w) << std::setprecision(p) << stress(i,2); + ofs << std::endl; + } + + for (const Atom &a:atoms) { + ofs << a.symbol[0] << a.symbol[1]; + ofs << std::setw(w-2) << std::fixed << std::right + << std::setprecision(p) << a.position(0); + ofs << std::setw(w) << std::fixed << std::right + << std::setprecision(p) << a.position(1); + ofs << std::setw(w) << std::fixed << std::right + << std::setprecision(p) << a.position(2); + ofs << std::setw(w) << std::fixed << std::right + << std::setprecision(p) << a.force(0); + ofs << std::setw(w) << std::fixed << std::right + << std::setprecision(p) << a.force(1); + ofs << std::setw(w) << std::fixed << std::right + << std::setprecision(p) << a.force(1); + ofs << std::endl; + } +} + +void Structure::save(const std::string fn) const { + std::ofstream ofs(fn); + save(ofs); + ofs.close(); +} +size_t Structure::get_nn_iindex(const size_t i, const size_t j, const size_t jj) const { + Vec3d shift_ijj = -near_neigh_shift[i][jj]; + size_t ii = 0; + + for (size_t x=0; x<near_neigh_idx[j].size(); ++x) { + if (near_neigh_idx[j][x] == i) + if (near_neigh_shift[j][ii] == shift_ijj) + break; + ii++; + } + return ii; + +} +double Structure::get_volume() const { + return cell.row(0)*(cell.row(1).cross(cell.row(2))); +} +double Structure::get_density() const { + double V = get_volume(); + V*=1e-24; // convert to cm^3 + double amu = 1.66053906660e-24; // g + double mass = 0; + for (const auto& a:atoms) mass += PeriodicTable::get_mass(a.Z); + return amu*mass/V; +} + +double Structure::get_temperature() const { + return T; +} + +double Structure::get_virial_pressure() const { + return stress.trace()/get_volume()/3; +} + +double Structure::get_pressure(double T, double kB) const { + double vpress = get_virial_pressure(); + return vpress + natoms()*kB*T/get_volume(); +} +bool Structure::operator==(const Structure& st) const +{ + double EPSILON = std::numeric_limits<double>::epsilon(); + bool result = + cell.isApprox(st.cell, EPSILON) + && stress.isApprox(st.stress, EPSILON) + && natoms()==st.natoms() + && (std::fabs(eweight-st.eweight) < EPSILON) + && (std::fabs(fweight-st.fweight) < EPSILON) + && (std::fabs(sweight-st.sweight) < EPSILON) + && (std::fabs(energy-st.energy) < EPSILON) + ; + if (!result) return result; + for (size_t i=0;i<natoms();++i) { + result = atoms[i]==st.atoms[i]; + if (!result) return result; + } + return result; +} +bool Structure::is_the_same(const Structure& st, double thr) const +{ + bool result = + cell.isApprox(st.cell, thr) + && natoms()==st.natoms(); + if (!result) return result; + + size_t count=0;; + for (size_t i=0;i<natoms();++i) { + for (size_t j=i;j<natoms();++j) { + result = atoms[i].is_the_same(st.atoms[j], thr); + if (result) count++; + } + } + + return count==natoms() ? true : false; +} +int Structure::next_structure(std::ifstream &ifs) { + std::string line; + std::getline(ifs,line); + if(line.empty()) return 0; + + std::getline(ifs,line); + // the second line could be energy or + // a scalling factors eweight fweight sweight + std::stringstream stream(line); + std::string temp; + size_t count = 0; + while(stream >> temp) { ++count;} + // optional if second line is a weight + if (count==3) + std::getline(ifs,line); + + for (size_t i=0; i<6;++i) + std::getline(ifs,line); + + int natoms=0; + while(std::getline(ifs,line)) { + if(line.empty()) break; + if(line == "\r") break; // detects windows newline + natoms++; + } + return natoms; +} +void Structure::clear_nn() { + near_neigh_shift.clear(); + near_neigh_idx.clear(); +} + +std::vector<Atom>::iterator Structure::begin() { + return atoms.begin(); +} + +std::vector<Atom>::iterator Structure::end() { + return atoms.end(); +} + +std::vector<Atom>::const_iterator Structure::begin() const { + return atoms.cbegin(); +} + +std::vector<Atom>::const_iterator Structure::end() const { + return atoms.cend(); +} +void Structure::dump_to_file(std::ostream& file, size_t prec) const { + const int n = 5; + file << label << std::endl; + file << std::fixed << std::setprecision(prec); + file << eweight << " " << fweight << " " << sweight << std::endl; + file << energy << " " << T << std::endl; + + file + << std::setw(prec+n) << cell(0,0) << " " + << std::setw(prec+n) << cell(0,1) << " " + << std::setw(prec+n) << cell(0,2) << " " << std::endl + << std::setw(prec+n) << cell(1,0) << " " + << std::setw(prec+n) << cell(1,1) << " " + << std::setw(prec+n) << cell(1,2) << " " << std::endl + << std::setw(prec+n) << cell(2,0) << " " + << std::setw(prec+n) << cell(2,1) << " " + << std::setw(prec+n) << cell(2,2) << " " << std::endl; + + file + << std::setw(prec+n) << stress(0,0) << " " + << std::setw(prec+n) << stress(0,1) << " " + << std::setw(prec+n) << stress(0,2) << " " << std::endl + << std::setw(prec+n) << stress(1,0) << " " + << std::setw(prec+n) << stress(1,1) << " " + << std::setw(prec+n) << stress(1,2) << " " << std::endl + << std::setw(prec+n) << stress(2,0) << " " + << std::setw(prec+n) << stress(2,1) << " " + << std::setw(prec+n) << stress(2,2) << " " << std::endl; + + for (const auto& a : atoms) { + file << std::setw(2) << a.symbol << " " + << std::setw(prec+n) << a.position[0] << " " + << std::setw(prec+n) << a.position[1] << " " + << std::setw(prec+n) << a.position[2] << " " + << std::setw(prec+n) << a.force[0] << " " + << std::setw(prec+n) << a.force[1] << " " + << std::setw(prec+n) << a.force[2] << std::endl; + } + file << std::endl; +} +void Structure::dump_to_file(const std::string& filepath, size_t prec) const { + std::ofstream file(filepath, std::ios::app); // Open in append mode + if (!file.is_open()) { + std::cerr << "Error: Could not open file for writing: " << filepath << std::endl; + return; + } + dump_to_file(file,prec); + file.close(); +} +std::set<Element> Structure::get_unique_elements() const{ + std::set<Element> unique_elements; + for (const auto& a:atoms) unique_elements.insert(a); + return unique_elements; +} diff --git a/src/structure_db.cpp b/src/structure_db.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f1aaa36f17971a77b3c18ab8a31e0653aab409fb --- /dev/null +++ b/src/structure_db.cpp @@ -0,0 +1,356 @@ +#include "tadah/core/element.h" +#include <tadah/mlip/structure_db.h> +#include <tadah/core/periodic_table.h> +#include <cstdio> +#include <cctype> // For std::isspace + +StructureDB::StructureDB() { + PeriodicTable::initialize(); +} +StructureDB::StructureDB(Config &config) { + PeriodicTable::initialize(); + add(config); +} + +void StructureDB::add(const std::string fn) { + std::ifstream ifs(fn); + if (!ifs.is_open()) { + throw std::runtime_error("DBFILE does not exist: "+fn); + } + parseFile(fn); + while (true) { + structures.push_back(Structure()); + int t = structures.back().read(ifs); + + // did we read structure succesfully? + // if not remove last object from the list + if (t==1) structures.pop_back(); + + if (ifs.eof()) break; + } + ifs.close(); +} +int StructureDB::add(const std::string fn, size_t first, int N) { + std::ifstream ifs(fn); + if (!ifs.is_open()) { + throw std::runtime_error("DBFILE does not exist: "+fn); + } + + // move iterator to the first structure we want to read + for (size_t i=0; i<first; ++i) { + Structure::next_structure(ifs); + } + + int count=0; + while (count++<N) { + structures.push_back(Structure()); + int t = structures.back().read(ifs); + + // did we read structure succesfully? + // if not remove last object from the list + if (t==1) structures.pop_back(); + + if (ifs.eof()) break; + } + ifs.close(); + + return --count; +} + +void StructureDB::add(const Structure &s) { + structures.push_back(s); +} +void StructureDB::add(const StructureDB &stdb) { + for (const auto &s: stdb) add(s); +} + +void StructureDB::remove(size_t i) { + structures.erase(structures.begin()+i); +} + +void StructureDB::add(Config &config) { + for (const std::string &s : config("DBFILE")) { + dbidx.push_back(size()); + add(s); + } + dbidx.push_back(size()); +} + +size_t StructureDB::size() const { + return structures.size(); +} + +size_t StructureDB::size(size_t n) const { + return dbidx[n+1]-dbidx[n]; +} + +Structure &StructureDB::operator()(size_t s) { + return structures[s]; +} + +const Structure &StructureDB::operator()(size_t s) const { + return structures[s]; +} + +Atom &StructureDB::operator()(size_t s, size_t a) { + return structures[s].atoms[a]; +} +size_t StructureDB::calc_natoms() const { + size_t natoms=0; + for (auto struc: structures) natoms += struc.natoms(); + return natoms; +} +size_t StructureDB::calc_natoms(size_t n) const { + size_t start = dbidx[n]; + size_t stop = dbidx[n+1]; + size_t natoms=0; + for (size_t i=start; i<stop; ++i) { + natoms += (*this)(i).natoms(); + } + return natoms; +} +std::set<Element> StructureDB::get_unique_elements() const { + std::set<Element> s; + for (const auto & st: structures) { + std::set<Element> u = st.get_unique_elements(); + s.insert(u.cbegin(),u.cend()); + } + return s; +} +std::set<Element> StructureDB::find_unique_elements(const std::string &fn) { + std::set<Element> s; + std::ifstream ifs(fn); + std::string line; + + if (!ifs.is_open()) { + std::cerr << "Could not open the file." << std::endl; + } + + char symbol[3]; + while (std::getline(ifs, line)) { + // the second line could be energy or + // a scalling factors eweight fweight sweight + std::getline(ifs,line); + std::stringstream stream(line); + size_t count = std::distance(std::istream_iterator<std::string>(stream), + std::istream_iterator<std::string>()); + + if (count == 3) + std::getline(ifs,line); + + for (size_t i=0; i<6; ++i) + std::getline(ifs,line); + + while (std::getline(ifs, line)) { + if(line.empty()) break; + sscanf(line.c_str(), "%2s", symbol); + s.insert(PeriodicTable::find_by_symbol(symbol)); + } + + } + return s; +} +std::set<Element> StructureDB::find_unique_elements(const Config &c) { + std::set<Element> s; + for (const auto& fn: c("DBFILE")) { + std::set<Element> temp = find_unique_elements(fn); + s.insert(temp.begin(), temp.end()); + } + return s; +} + +template <typename T,typename U> +std::pair<T,U> operator+(const std::pair<T,U> &l,const std::pair<T,U> &r) { + return {l.first+r.first,l.second+r.second}; +} +std::pair<int,int> StructureDB::count(const Config &config) { + std::pair<int, int> res=std::make_pair(0,0); + for (const std::string &fn : config("DBFILE")) { + res = res + count(fn); + } + return res; +} + +std::pair<int,int> StructureDB::count(const std::string fn){ + int nstruc=0; + int natoms_tot=0; + + std::ifstream ifs(fn); + if (!ifs.is_open()) { + throw std::runtime_error("DBFILE does not exist: "+fn); + } + int natoms = Structure::next_structure(ifs); + while(natoms) { + natoms_tot += natoms; + natoms = Structure::next_structure(ifs); + nstruc++; + } + + std::pair<int,int> res = std::make_pair(nstruc,natoms_tot); + return res; +} +void StructureDB::clear_nn() { + for (auto &struc: structures) struc.clear_nn(); +} +void StructureDB::check_atoms_key(Config &config, std::set<Element> &unique_elements) { + bool error=false; + + if (config.exist("ATOMS")) { + // user set this key so here we check does it correspond to unique_elements + if (unique_elements.size()!=config.size("ATOMS")) { + error=true; + } + + auto set_it = unique_elements.begin(); + auto atoms_it = config("ATOMS").begin(); + while (set_it != unique_elements.end() && atoms_it != config("ATOMS").end()) { + if (set_it->symbol != (*atoms_it)) { + error=true; + } + ++set_it; + ++atoms_it; + } + if (error) { + throw std::runtime_error("\n" + "Mismatch between elements in datasets and ATOMS in the config file.\n" + "Please either update the ATOMS in the config file or remove ATOMS\n" + "key completely. Tadah! will automatically configure this key.\n" + ); + } + } else { + for (const auto &s : unique_elements) config.add("ATOMS", s.symbol); + } +} +void StructureDB::check_watoms_key(Config &config, std::set<Element> &unique_elements) { + bool error=false; + + if (config.exist("WATOMS")) { + // user set this key so here we check does it correspond to unique_elements + if (unique_elements.size()!=config.size("WATOMS")) { + error=true; + } + if (error) { + throw std::runtime_error("\n" + "Mismatch between elements in datasets and WATOMS in the config file.\n" + "Please either update the WATOMS in the config file or remove WATOMS\n" + "key completely. In the latter case Tadah! will use default values.\n" + ); + } + } else { + for (const auto &s : unique_elements) config.add("WATOMS", s.Z); + } +} + +std::vector<Structure>::iterator StructureDB::begin() { + return structures.begin(); +} + +std::vector<Structure>::iterator StructureDB::end() { + return structures.end(); +} + +std::vector<Structure>::const_iterator StructureDB::begin() const { + return structures.cbegin(); +} + +std::vector<Structure>::const_iterator StructureDB::end() const { + return structures.cend(); +} +void StructureDB::dump_to_file(const std::string& filepath, size_t prec) const { + std::ofstream file(filepath, std::ios::app); // Open in append mode + if (!file.is_open()) { + std::cerr << "Error: Could not open file for writing: " << filepath << std::endl; + return; + } + for (const auto &s : structures) { + s.dump_to_file(file,prec); + } + file.close(); +} + +std::string StructureDB::summary() const { + std::string str = "# of structures : " + to_string(structures.size()); + + str += " | # of atoms : " + to_string(calc_natoms()); + + str += " | Elements : "; + std::set<Element> ue = get_unique_elements(); + for (const auto &e: ue) str+= e.symbol + " "; + str+="\n"; + return str; +} +void StructureDB::parseFile(const std::string& filename) +{ + std::ifstream fin(filename, std::ios::in | std::ios::binary); + if (!fin.is_open()) + { + std::cerr << "Error: could not open file " << filename << "\n"; + return; + } + + std::size_t header_size = 9; + + // Increase buffer size to speed up I/O on large files. + static const size_t BUFSIZE = 100ULL << 20; // 100 MiB + char* buffer = new char[BUFSIZE]; + fin.rdbuf()->pubsetbuf(buffer, BUFSIZE); + + std::vector<size_t> blockLineCounts; + blockLineCounts.reserve(10000); // Pre-allocate to reduce repeated allocations + + size_t currentBlockCount = 0; + std::string line; + + while (true) + { + if (!std::getline(fin, line)) + { + // End of file or read error + break; + } + + if (isBlankLine(line)) + { + // We reached the end of the current block + if (currentBlockCount > 0) + { + blockLineCounts.push_back(currentBlockCount-header_size); + currentBlockCount = 0; + } + } + else + { + // Non-empty line => belongs to the current block + currentBlockCount++; + } + } + + // If the last block didn’t end with a blank line, close it out + if (currentBlockCount > 0) + { + blockLineCounts.push_back(currentBlockCount-header_size); + } + + fin.close(); + delete[] buffer; + + // Print the results + std::cout << "Found " << blockLineCounts.size() << " blocks.\n"; + for (size_t i = 0; i < blockLineCounts.size(); i+=1000) + { + std::cout << "Block " << i << " has " + << blockLineCounts[i] << " atoms\n"; + } +} + +bool StructureDB::isBlankLine(const std::string& line) const +{ + for (char c : line) + { + if (!std::isspace(static_cast<unsigned char>(c))) + { + return false; + } + } + return true; +}