diff --git a/include/tadah/models/ea.h b/include/tadah/models/ea.h index 3ea1175df2d8d3f36c38d7d2ca4940050c339e89..2f6987d866080f957f59c4c4177a92c79881e7ce 100644 --- a/include/tadah/models/ea.h +++ b/include/tadah/models/ea.h @@ -4,6 +4,7 @@ #include <tadah/core/config.h> #include <tadah/models/svd.h> #include <tadah/models/ridge_regression.h> +#include <tadah/models/ols.h> #include <tadah/core/core_types.h> #include <tadah/core/lapack.h> @@ -11,10 +12,10 @@ class EA { private: const Config &config; const SVD &svd; - const t_type &T; + t_type &T; int verbose; public: - EA(const Config &c, const SVD &svd, const t_type &T): + EA(const Config &c, const SVD &svd, t_type &T): config(c), svd(svd), T(T), @@ -51,11 +52,16 @@ class EA { double *t = new double[svd.shapeA().first]; double *x = new double[svd.shapeA().second]; t_type m_n((size_t)svd.sizeS()); + OLS::Workspace ws; + auto shapeA = svd.shapeA(); + int m = shapeA.first; + int n = shapeA.second; double rcond = config.size("OALGO")==2 ? config.get<double>("OALGO",1) : 1e-8; while (test1>EPS2 || test2>EPS2) { // regularised least square problem (Tikhonov Regularization): - RidgeRegression::solve(svd,T,m_n,lambda,rcond); + /*RidgeRegression::solve(svd,T,m_n,lambda,rcond);*/ + OLS::solve_with_svd_with_workspace(m, n, T, m_n, svd, ws, rcond, lambda); double gamma = 0.0; for (size_t j=0; j<svd.sizeS(); j++) { diff --git a/include/tadah/models/linear_regressor.h b/include/tadah/models/linear_regressor.h index 92c47ea35cc06a7c6b46e3e1caed9f1a83887e28..4e7355fb66fcd165102a4f57cafb63abf2eb432d 100644 --- a/include/tadah/models/linear_regressor.h +++ b/include/tadah/models/linear_regressor.h @@ -65,7 +65,6 @@ public: } else if (oalgo==3) { SVD svd = SVD(Phi);; - // OLS::Workspace ws; if (lambda < 0) { double alpha = config.get<double>("ALPHA"); double beta = config.get<double>("BETA"); @@ -78,22 +77,11 @@ public: config.add("BETA", beta); } - Sigma = get_sigma(svd, lambda); - //std::cout << "Sigma: " << Sigma << std::endl; config_add_sigma(config, Sigma); if (verbose) std::cout << std::endl << "REG LAMBDA: " << lambda << std::endl; - // ws.U = svd.getU(); - // ws.S = svd.getS(); - // ws.VT = svd.getVT(); - - // std::cout << "own_U: " << ws.owns_U << std::endl; - //OLS::solve_with_workspace(Phi, T, weights, ws, rcond, OLS::Algorithm::SVD); - // OLS::solve(Phi, T, weights, rcond, OLS::Algorithm::SVD); - RidgeRegression::solve(svd, T, weights, lambda, rcond); - //Sigma = get_sigma(svd, lambda); - //std::cout << "Sigma: " << Sigma << std::endl; + OLS::solve_with_svd(Phi.rows(), Phi.cols(), T, weights, svd, rcond, lambda); } else { throw std::runtime_error("Unsupported OALGO: " + std::to_string(oalgo)); diff --git a/include/tadah/models/ols.h b/include/tadah/models/ols.h index e6c11f6650606439928f52d6c9816e4baa179aaa..3c4ec409f24cb6d13ff5d9e8677291dba3ffbda7 100644 --- a/include/tadah/models/ols.h +++ b/include/tadah/models/ols.h @@ -5,14 +5,15 @@ #include <tadah/core/core_types.h> #include <cmath> #include <stdexcept> -#include <cstdlib> // For malloc and free -#include <algorithm> // For std::max +#include <cstdlib> // For malloc and free +#include <algorithm> // For std::max +#include <iostream> // For std::ostream /** * @class OLS * @brief Provides functionality for solving Ordinary Least Squares (OLS) problems. * - * Utilizes various algorithms to solve linear systems where the goal is to minimize + * Utilizes various algorithms to solve linear systems where the goal is to minimize * the Euclidean norm of residuals. Supports the following algorithms: * * **Algorithms:** @@ -37,247 +38,389 @@ * * // Solve using Cholesky algorithm * OLS::solve_with_workspace(A, B, weights, ws, -1.0, OLS::Algorithm::Cholesky); + * + * // Solve using precomputed SVD and regularization + * SVD svd(A); + * double lambda = 0.1; + * OLS::solve_with_svd(A, B, weights, svd, lambda); * ``` */ class OLS { public: - /** + /** * @brief Enum to select the algorithm used for solving the OLS problem. */ - enum class Algorithm { - GELS, // Uses DGELS - GELSD, // Uses DGELSD - SVD, // Uses custom SVD implementation - Cholesky // Uses Cholesky decomposition - }; - - /** + enum class Algorithm { + GELS, // Uses DGELS + GELSD, // Uses DGELSD + SVD, // Uses custom SVD implementation + Cholesky // Uses Cholesky decomposition + }; + + // Overload the << operator for the Algorithm enum class + friend std::ostream& operator<<(std::ostream& os, Algorithm algo) { + switch (algo) { + case Algorithm::GELS: + os << "GELS"; + break; + case Algorithm::GELSD: + os << "GELSD"; + break; + case Algorithm::SVD: + os << "SVD"; + break; + case Algorithm::Cholesky: + os << "Cholesky"; + break; + default: + os << "Unknown Algorithm"; + break; + } + return os; + } + + /** * @brief Workspace struct to hold pre-allocated memory. * * This struct contains the necessary buffers for the computations in different algorithms. * Users can create an instance of `Workspace` and pass it to the `solve_with_workspace` method. */ - struct Workspace { - // Common workspace variables - double* work; // Double workspace array - int lwork; // Size of the work array - int m; // Number of rows in A (for which workspace was allocated) - int n; // Number of columns in A (for which workspace was allocated) - Algorithm algo; // Algorithm for which workspace was allocated - - // For DGELSD (GELSD algorithm) - double* s; // Singular values array - int* iwork; // Integer workspace array - - // For SVD algorithm - double* U; // U matrix from SVD (m x n) - double* S; // Singular values array (size min(m,n)) - double* VT; // V^T matrix from SVD (n x n) - int* iwork_svd; // Integer workspace array for SVD - - // Ownership flags for U, S, VT - bool owns_U; - bool owns_S; - bool owns_VT; - - // For Cholesky algorithm - double* AtA; // A^T * A matrix (n x n) - double* Atb; // A^T * b vector (n) - - // Constructors and methods (declarations only) - Workspace(); - ~Workspace(); - - void allocate(int m_, int n_, Algorithm algorithm = Algorithm::GELSD); - void release(); - bool is_sufficient(int m_, int n_, Algorithm algorithm = Algorithm::GELSD) const; - }; - - // Public methods - template <typename M, typename V> - static void solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm); - - template <typename M, typename V> - static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm); - - template <typename M, typename V> - static void solve(M& A, V& B, V& weights); - - template <typename M, typename V> - static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws); + struct Workspace { + // Common workspace variables + double* work; // Double workspace array + int lwork; // Size of the work array + int m; // Number of rows in A (for which workspace was allocated) + int n; // Number of columns in A (for which workspace was allocated) + Algorithm algo; // Algorithm for which workspace was allocated + + // For DGELSD (GELSD algorithm) + double* s; // Singular values array + int* iwork; // Integer workspace array + + // For SVD algorithm + double lambda; + double* U; // U matrix from SVD (m x min(m,n)) + double* S; // Singular values array (size min(m,n)) + double* VT; // V^T matrix from SVD (min(m,n) x n) + int* iwork_svd; // Integer workspace array for SVD + double* U_t_b; // Store result of U^T * b + + // Ownership flags for U, S, VT + bool owns_U; + bool owns_S; + bool owns_VT; + + // For Cholesky algorithm + double* AtA; // A^T * A matrix (n x n) + double* Atb; // A^T * b vector (n) + + // Constructors and methods (declarations only) + Workspace(); + ~Workspace(); + + void allocate(int m_, int n_, Algorithm algorithm = Algorithm::GELSD); + void release(); + bool is_sufficient(int m_, int n_, Algorithm algorithm = Algorithm::GELSD) const; + }; + + // Public methods + + // Original solve methods + template <typename M, typename V> + static void solve(M& A, V& B, V& weights); + + template <typename M, typename V> + static void solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm); + + template <typename M, typename V> + static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws); + + template <typename M, typename V> + static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm); + + // New methods: solve_with_svd + + // Solve using precomputed SVD (without workspace) + template <typename V1 ,typename V2, typename SVDType> + static void solve_with_svd(int m_, int n_, V1& B, V2& weights, SVDType& svd); + + template <typename V1 ,typename V2, typename SVDType> + static void solve_with_svd(int m_, int n_, V1& B, V2& weights, SVDType& svd, double rcond, double lambda); + + // Solve using precomputed SVD with workspace + template <typename V1 ,typename V2, typename SVDType> + static void solve_with_svd_with_workspace(int m_, int n_, V1& B, V2& weights, SVDType& svd, Workspace& ws); + + template <typename V1 ,typename V2, typename SVDType> + static void solve_with_svd_with_workspace(int m_, int n_, V1& B, V2& weights, SVDType& svd, Workspace& ws, double rcond, double lambda); }; -// Implement template methods here -// Since they are templates, definitions must be in the header +// Implement template methods here (definitions must be in the header) + +// Original solve methods + +template <typename M, typename V> +void OLS::solve(M& A, V& B, V& weights) { + double rcond = -1.0; + Algorithm algorithm = Algorithm::GELSD; + solve(A, B, weights, rcond, algorithm); +} template <typename M, typename V> void OLS::solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm) { - Workspace ws; - solve_with_workspace(A, B, weights, ws, rcond, algorithm); + Workspace ws; + solve_with_workspace(A, B, weights, ws, rcond, algorithm); } template <typename M, typename V> -void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm) { - int m = A.rows(); - int n = A.cols(); - - // Check for valid algorithm - if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS && - algorithm != Algorithm::SVD && algorithm != Algorithm::Cholesky) { - throw std::invalid_argument("Invalid algorithm specified."); - } - - // Check that dimensions of weights match expected size - if (weights.size() != static_cast<std::size_t>(n)) { - throw std::invalid_argument("weights size is incorrect."); - } - - // Check that B has sufficient size - int maxmn = std::max(m, n); - if (B.size() < static_cast<std::size_t>(maxmn)) { - throw std::invalid_argument("B size is insufficient."); - } - - // Check if workspace is sufficient; if not, reallocate - if (!ws.is_sufficient(m, n, algorithm)) { - ws.allocate(m, n, algorithm); - } - - int info; - if (algorithm == Algorithm::GELSD) { - int nrhs = 1; - double* a = A.ptr(); - int lda = m; - double* b = B.ptr(); - int ldb = maxmn; - int rank; +void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws) { + double rcond = -1.0; + Algorithm algorithm = Algorithm::GELSD; + solve_with_workspace(A, B, weights, ws, rcond, algorithm); +} - // Solve using DGELSD - ::dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, ws.s, &rcond, &rank, - ws.work, &ws.lwork, ws.iwork, &info); +// The main 'solve_with_workspace' template method +template <typename M, typename V> +void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm) { + int m = A.rows(); + int n = A.cols(); - if (info != 0) { - throw std::runtime_error("Error in DGELSD: info = " + std::to_string(info)); + // Check for valid algorithm + if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS && + algorithm != Algorithm::SVD && algorithm != Algorithm::Cholesky) { + throw std::invalid_argument("Invalid algorithm specified."); } - // Copy solution to weights - for (int i = 0; i < n; ++i) { - weights[i] = b[i]; + // Check that dimensions of weights match expected size + if (weights.size() != static_cast<std::size_t>(n)) { + throw std::invalid_argument("weights size is incorrect."); } - } else if (algorithm == Algorithm::GELS) { - int nrhs = 1; - double* a = A.ptr(); - int lda = m; - double* b = B.ptr(); - int ldb = m; - char trans = 'N'; - // Solve using DGELS - ::dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, - ws.work, &ws.lwork, &info); + // Check that B has sufficient size + int maxmn = std::max(m, n); + if (B.size() < static_cast<std::size_t>(maxmn)) { + throw std::invalid_argument("B size is insufficient."); + } - if (info != 0) { - throw std::runtime_error("Error in DGELS: info = " + std::to_string(info)); + // Check if workspace is sufficient; if not, reallocate + if (!ws.is_sufficient(m, n, algorithm)) { + ws.allocate(m, n, algorithm); } - // Copy solution to weights - for (int i = 0; i < n; ++i) { - weights[i] = b[i]; + int info; + if (algorithm == Algorithm::GELSD) { + int nrhs = 1; + double* a = A.ptr(); + int lda = m; + double* b = B.ptr(); + int ldb = maxmn; + int rank; + + // Solve using DGELSD + ::dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, ws.s, &rcond, &rank, + ws.work, &ws.lwork, ws.iwork, &info); + + if (info != 0) { + throw std::runtime_error("Error in DGELSD: info = " + std::to_string(info)); + } + + // Copy solution to weights + for (int i = 0; i < n; ++i) { + weights[i] = b[i]; + } + } else if (algorithm == Algorithm::GELS) { + int nrhs = 1; + double* a = A.ptr(); + int lda = m; + double* b = B.ptr(); + int ldb = m; + char trans = 'N'; + + // Solve using DGELS + ::dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, + ws.work, &ws.lwork, &info); + + if (info != 0) { + throw std::runtime_error("Error in DGELS: info = " + std::to_string(info)); + } + + // Copy solution to weights + for (int i = 0; i < n; ++i) { + weights[i] = b[i]; + } + } else if (algorithm == Algorithm::SVD) { + + // Custom SVD-based solution + int min_mn = std::min(m, n); + + if (!ws.U || !ws.S || !ws.VT) { + // Perform SVD + + // Ensure workspace is allocated + if (!ws.is_sufficient(m, n, algorithm)) { + ws.allocate(m, n, algorithm); + } + + char jobz = 'S'; // Compute the first min(m,n) singular values and vectors + int lda = m; + int ldu = m; + int ldvt = min_mn; // VT is min(m,n) x n + + // Now perform SVD + ::dgesdd_(&jobz, &m, &n, A.ptr(), &lda, ws.S, ws.U, &ldu, ws.VT, &ldvt, ws.work, &ws.lwork, ws.iwork_svd, &info); + + if (info != 0) { + throw std::runtime_error("Error in DGESDD: info = " + std::to_string(info)); + } + + ws.owns_U = true; + ws.owns_S = true; + ws.owns_VT = true; + } + + if (ws.U_t_b == nullptr) { + ws.U_t_b = new double[min_mn]; + } + + // Compute Uáµ— * b + char trans = 'T'; // Transpose U + double alpha = 1.0; + double beta = 0.0; + int incx = 1; + int incy = 1; + int lda_u = m; + ::dgemv_(&trans, &m, &min_mn, &alpha, ws.U, &lda_u, B.ptr(), &incx, &beta, ws.U_t_b, &incy); + + // Find the maximum singular value + double smax = 0.0; + for (int i = 0; i < min_mn; ++i) { + if (ws.S[i] > smax) { + smax = ws.S[i]; + } + } + + // Threshold for singular values + double thresh = rcond > 0.0 ? rcond * smax : 0.0; + + // Apply regularization using smax and lambda from workspace + for (int i = 0; i < min_mn; ++i) { + if (ws.S[i] > thresh) { + ws.U_t_b[i] = ws.U_t_b[i] * ws.S[i] / (ws.S[i] * ws.S[i] + ws.lambda); + } else { + ws.U_t_b[i] = 0.0; + } + } + + // Compute x = V * (regularized solution) + trans = 'T'; // Transpose VT to get V + int lda_vt = min_mn; + ::dgemv_(&trans, &min_mn, &n, &alpha, ws.VT, &lda_vt, ws.U_t_b, &incx, &beta, weights.ptr(), &incy); + + } else if (algorithm == Algorithm::Cholesky) { + // Cholesky-based solution + throw std::runtime_error("Cholesky algorithm is not implemented in this version."); } - } else if (algorithm == Algorithm::SVD) { +} - // Custom SVD-based solution - int min_mn = std::min(m, n); +// New methods: solve_with_svd - // Check if U, S, VT are provided - bool have_svd_matrices = (ws.U != nullptr) && (ws.S != nullptr) && (ws.VT != nullptr); +// Overload without lambda, defaults to lambda = 0.0 +template <typename V1 ,typename V2, typename SVDType> +void OLS::solve_with_svd(int m_, int n_, V1& B, V2& weights, SVDType& svd) { + double rcond = -1.0; + double lambda = 0.0; + Workspace ws; + solve_with_svd_with_workspace(m_, n_, B, weights, svd, ws, rcond, lambda); +} - if (!have_svd_matrices) { - // Perform SVD +// Overload with rcond and lambda +template <typename V1 ,typename V2, typename SVDType> +void OLS::solve_with_svd(int m_, int n_, V1& B, V2& weights, SVDType& svd, double rcond, double lambda) { + Workspace ws; + solve_with_svd_with_workspace(m_, n_, B, weights, svd, ws, rcond, lambda); +} - // Ensure workspace is allocated - if (!ws.is_sufficient(m, n, algorithm)) { - ws.allocate(m, n, algorithm); - } +// Overload with workspace and default lambda = 0.0 +template <typename V1 ,typename V2, typename SVDType> +void OLS::solve_with_svd_with_workspace(int m_, int n_, V1& B, V2& weights, SVDType& svd, Workspace& ws) { + double rcond = -1.0; + double lambda = 0.0; + solve_with_svd_with_workspace(m_, n_, B, weights, svd, ws, rcond, lambda); +} + +// Main method with workspace, rcond, and lambda +template <typename V1 ,typename V2, typename SVDType> +void OLS::solve_with_svd_with_workspace(int m_, int n_, V1& B, V2& weights, SVDType& svd, Workspace& ws, double rcond, double lambda) { + int m = m_; + int n = n_; - char jobz = 'S'; // Compute the first min(m,n) singular values and vectors - int lda = m; - int ldu = m; - int ldvt = min_mn; + int min_mn = std::min(m, n); - // Now perform SVD - ::dgesdd_(&jobz, &m, &n, A.ptr(), &lda, ws.S, ws.U, &ldu, ws.VT, &ldvt, ws.work, &ws.lwork, ws.iwork_svd, &info); + // Check that dimensions of weights match expected size + if (weights.size() != static_cast<std::size_t>(n)) { + throw std::invalid_argument("weights size is incorrect."); + } + // Check that B has sufficient size + int maxmn = std::max(m, n); + if (B.size() < static_cast<std::size_t>(maxmn)) { + throw std::invalid_argument("B size is insufficient."); + } - if (info != 0) { - throw std::runtime_error("Error in DGESDD: info = " + std::to_string(info)); - } + // Check if workspace is sufficient; if not, reallocate + if (!ws.is_sufficient(m, n, Algorithm::SVD)) { + ws.allocate(m, n, Algorithm::SVD); } - // Now compute x = V * Sâ»Â¹ * Uáµ— * b + // Copy precomputed SVD components into the workspace + ws.U = svd.getU(); // Assume getU() returns a pointer to U (m x min(m,n)) + ws.S = svd.getS(); // Assume getS() returns a pointer to singular values (min(m,n)) + ws.VT = svd.getVT(); // Assume getVT() returns a pointer to VT (min(m,n) x n) + ws.lambda = lambda; + + // Indicate that we don't own the SVD component pointers + ws.owns_U = false; + ws.owns_S = false; + ws.owns_VT = false; - // Compute Uáµ— * b - std::vector<double> U_t_b(min_mn, 0.0); + // Proceed with the computations + if (ws.U_t_b == nullptr) { + ws.U_t_b = new double[min_mn]; + } + + // Compute Uáµ— * B char trans = 'T'; // Transpose U double alpha = 1.0; double beta = 0.0; int incx = 1; int incy = 1; - int m_u = m; - int n_u = min_mn; - ::dgemv_(&trans, &m_u, &n_u, &alpha, ws.U, &m_u, B.ptr(), &incx, &beta, U_t_b.data(), &incy); + int lda = m; + ::dgemv_(&trans, &m, &min_mn, &alpha, ws.U, &lda, B.ptr(), &incx, &beta, ws.U_t_b, &incy); // Find the maximum singular value double smax = 0.0; for (int i = 0; i < min_mn; ++i) { - if (ws.S[i] > smax) { - smax = ws.S[i]; - } + if (ws.S[i] > smax) { + smax = ws.S[i]; + } } // Threshold for singular values double thresh = rcond > 0.0 ? rcond * smax : 0.0; + // Apply regularization using 'smax' approach and lambda for (int i = 0; i < min_mn; ++i) { - if (ws.S[i] > thresh) { - U_t_b[i] /= ws.S[i]; - //U_t_b[i] /= ws.S[i] / (ws.S[i] * ws.S[i] + ws.lambda); - } else { - // Singular value is too small; set result to zero - U_t_b[i] = 0.0; - } + if (ws.S[i] > thresh) { + ws.U_t_b[i] = ws.U_t_b[i] * ws.S[i] / (ws.S[i] * ws.S[i] + ws.lambda); + } else { + ws.U_t_b[i] = 0.0; + } } - - // Compute x = V * (Uáµ— * b / S) + // Compute x = V * (regularized solution) trans = 'T'; // Transpose VT to get V - int m_vt = min_mn; - int n_vt = n; - ::dgemv_(&trans, &m_vt, &n_vt, &alpha, ws.VT, &m_vt, U_t_b.data(), &incx, &beta, weights.ptr(), &incy); - - - } else if (algorithm == Algorithm::Cholesky) { - // Cholesky-based solution - // Implementation can be provided or left as an exercise - throw std::runtime_error("Cholesky algorithm is not implemented in this version."); - } -} - -template <typename M, typename V> -void OLS::solve(M& A, V& B, V& weights) { - // Default rcond and algorithm - double rcond = -1.0; - Algorithm algorithm = Algorithm::GELSD; - solve(A, B, weights, rcond, algorithm); -} - -template <typename M, typename V> -void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws) { - // Default rcond and algorithm - double rcond = -1.0; - Algorithm algorithm = Algorithm::GELSD; - solve_with_workspace(A, B, weights, ws, rcond, algorithm); + int lda_vt = min_mn; + ::dgemv_(&trans, &min_mn, &n, &alpha, ws.VT, &lda_vt, ws.U_t_b, &incx, &beta, weights.ptr(), &incy); } #endif // OLS_H - diff --git a/src/ols.cpp b/src/ols.cpp index cd2eced12464f122cb6f6442eb415b5eef3cbbaf..6de4092eaa721bd169b24b9f49f75bc3e9e82956 100644 --- a/src/ols.cpp +++ b/src/ols.cpp @@ -1,157 +1,163 @@ #include <tadah/models/ols.h> #include <cmath> #include <stdexcept> -#include <cstdlib> // For malloc and free -#include <algorithm> // For std::max +#include <cstdlib> // For malloc and free +#include <algorithm> // For std::max // Implementation of non-template methods // Constructor OLS::Workspace::Workspace() - : work(nullptr), lwork(-1), m(0), n(0), algo(Algorithm::GELSD), - s(nullptr), iwork(nullptr), - U(nullptr), S(nullptr), VT(nullptr), iwork_svd(nullptr), - owns_U(false), owns_S(false), owns_VT(false), - AtA(nullptr), Atb(nullptr) { + : work(nullptr), lwork(-1), m(0), n(0), algo(Algorithm::GELSD), + s(nullptr), iwork(nullptr), + lambda(0.0), U(nullptr), S(nullptr), VT(nullptr), iwork_svd(nullptr), U_t_b(nullptr), + owns_U(false), owns_S(false), owns_VT(false), + AtA(nullptr), Atb(nullptr) { } // Destructor OLS::Workspace::~Workspace() { - release(); + release(); } // allocate method void OLS::Workspace::allocate(int m_, int n_, Algorithm algorithm) { - release(); // Release any existing memory - - m = m_; - n = n_; - algo = algorithm; - - int info; - - if (algorithm == Algorithm::GELSD) { - // Allocate workspace for DGELSD - int minmn = std::min(m, n); - int maxmn = std::max(m, n); - int nlvl = std::max(0, static_cast<int>(std::log2(minmn / 25.0)) + 1); - - // Allocate s (singular values) - s = new double[minmn]; - - // Allocate iwork - int liwork = 3 * minmn * nlvl + 11 * minmn; - iwork = new int[liwork]; - - // Workspace query - double work_query; - int lwork_query = -1; - int nrhs = 1; - double* a_dummy = nullptr; - double* b_dummy = nullptr; - ::dgelsd_(&m, &n, &nrhs, a_dummy, &m, b_dummy, &maxmn, s, nullptr, - nullptr, &work_query, &lwork_query, iwork, &info); - lwork = static_cast<int>(work_query); - work = new double[lwork]; - } else if (algorithm == Algorithm::GELS) { - // Allocate workspace for DGELS - double work_query; - int lwork_query = -1; - int nrhs = 1; - double* a_dummy = nullptr; - double* b_dummy = nullptr; - char trans = 'N'; - ::dgels_(&trans, &m, &n, &nrhs, a_dummy, &m, b_dummy, &m, &work_query, &lwork_query, &info); - lwork = static_cast<int>(work_query); - work = new double[lwork]; - } else if (algorithm == Algorithm::SVD) { - - // Allocate U, S, VT, and iwork_svd - int min_mn = std::min(m, n); - - U = new double[m * min_mn]; // U is m x min(m,n) - S = new double[min_mn]; // Singular values - VT = new double[min_mn * n]; // VT is min(m,n) x n - iwork_svd = new int[8 * min_mn]; // Workspace for dgesdd_ - - owns_U = true; - owns_S = true; - owns_VT = true; - - // Query for optimal workspace size - char jobz = 'S'; - int lda = m; - int ldu = m; - int ldvt = min_mn; - lwork = -1; - double wkopt; - int info_query; - double* a_dummy = nullptr; - ::dgesdd_(&jobz, &m, &n, a_dummy, &lda, S, U, &ldu, VT, &ldvt, &wkopt, &lwork, iwork_svd, &info_query); - - if (info_query != 0) { - throw std::runtime_error("Error in DGESDD (workspace query): info = " + std::to_string(info_query)); + release(); // Release any existing memory + + m = m_; + n = n_; + algo = algorithm; + lambda = 0.0; // Reset lambda + + int info; + + if (algorithm == Algorithm::GELSD) { + // Allocate workspace for DGELSD + int minmn = std::min(m, n); + int maxmn = std::max(m, n); + int nlvl = std::max(0, static_cast<int>(std::log2(minmn / 25.0)) + 1); + + // Allocate s (singular values) + s = new double[minmn]; + + // Allocate iwork + int liwork = 3 * minmn * nlvl + 11 * minmn; + iwork = new int[liwork]; + + // Workspace query + double work_query; + int lwork_query = -1; + int nrhs = 1; + double* a_dummy = nullptr; + double* b_dummy = nullptr; + ::dgelsd_(&m, &n, &nrhs, a_dummy, &m, b_dummy, &maxmn, s, nullptr, + nullptr, &work_query, &lwork_query, iwork, &info); + lwork = static_cast<int>(work_query); + work = new double[lwork]; + } else if (algorithm == Algorithm::GELS) { + // Allocate workspace for DGELS + double work_query; + int lwork_query = -1; + int nrhs = 1; + double* a_dummy = nullptr; + double* b_dummy = nullptr; + char trans = 'N'; + ::dgels_(&trans, &m, &n, &nrhs, a_dummy, &m, b_dummy, &m, &work_query, &lwork_query, &info); + lwork = static_cast<int>(work_query); + work = new double[lwork]; + } else if (algorithm == Algorithm::SVD) { + + // Allocate U, S, VT, and iwork_svd + int min_mn = std::min(m, n); + + // Only allocate if we own the data + if (owns_U) { + U = new double[m * min_mn]; // U is m x min(m,n) + } + if (owns_S) { + S = new double[min_mn]; // Singular values + } + if (owns_VT) { + VT = new double[min_mn * n]; // VT is min(m,n) x n + } + iwork_svd = new int[8 * min_mn]; // Workspace for dgesdd_ + + // Query for optimal workspace size + char jobz = 'S'; + int lda = m; + int ldu = m; + int ldvt = min_mn; + lwork = -1; + double wkopt; + int info_query; + double* a_dummy = nullptr; + ::dgesdd_(&jobz, &m, &n, a_dummy, &lda, S, U, &ldu, VT, &ldvt, &wkopt, &lwork, iwork_svd, &info_query); + + if (info_query != 0) { + throw std::runtime_error("Error in DGESDD (workspace query): info = " + std::to_string(info_query)); + } + + lwork = static_cast<int>(wkopt); + work = new double[lwork]; + + } else if (algorithm == Algorithm::Cholesky) { + // Allocate space for AtA (n x n) and Atb (n) + AtA = new double[n * n]; + Atb = new double[n]; + + // No additional workspace required for Cholesky decomposition + work = nullptr; + lwork = 0; + } else { + throw std::invalid_argument("Invalid algorithm specified in Workspace::allocate()."); } - - lwork = static_cast<int>(wkopt); - work = new double[lwork]; - - - } else if (algorithm == Algorithm::Cholesky) { - // Allocate space for AtA (n x n) and Atb (n) - AtA = new double[n * n]; - Atb = new double[n]; - - // No additional workspace required for Cholesky decomposition - work = nullptr; - lwork = 0; - } else { - throw std::invalid_argument("Invalid algorithm specified in Workspace::allocate()."); - } } // release method void OLS::Workspace::release() { - if (owns_U) { - delete[] U; - U = nullptr; + if (owns_U && U != nullptr) { + delete[] U; + U = nullptr; + } + if (owns_S && S != nullptr) { + delete[] S; + S = nullptr; + } + if (owns_VT && VT != nullptr) { + delete[] VT; + VT = nullptr; + } + delete[] s; + delete[] iwork; + delete[] U_t_b; + delete[] iwork_svd; + delete[] work; + delete[] AtA; + delete[] Atb; + + s = nullptr; + iwork = nullptr; + U_t_b = nullptr; + iwork_svd = nullptr; + work = nullptr; + AtA = nullptr; + Atb = nullptr; + + lwork = -1; + m = 0; + n = 0; + algo = Algorithm::GELSD; + lambda = 0.0; + owns_U = false; - } - if (owns_S) { - delete[] S; - S = nullptr; owns_S = false; - } - if (owns_VT) { - delete[] VT; - VT = nullptr; owns_VT = false; - } - delete[] s; - delete[] iwork; - delete[] iwork_svd; - delete[] work; - delete[] AtA; - delete[] Atb; - - s = nullptr; - iwork = nullptr; - iwork_svd = nullptr; - work = nullptr; - AtA = nullptr; - Atb = nullptr; - - lwork = -1; - m = 0; - n = 0; - algo = Algorithm::GELSD; } // is_sufficient method bool OLS::Workspace::is_sufficient(int m_, int n_, Algorithm algorithm) const { - if (m != m_ || n != n_ || algo != algorithm) { - return false; - } - // Additional checks can be added here if necessary - return true; + if (m != m_ || n != n_ || algo != algorithm) { + return false; + } + return true; }