From e449d5672b452f406662e82f2ac9d8b822bbbaf4 Mon Sep 17 00:00:00 2001 From: mkirsz <s1351949@sms.ed.ac.uk> Date: Sun, 16 Feb 2025 17:11:15 +0000 Subject: [PATCH] Improved OLS --- include/tadah/models/linear_regressor.h | 2 +- include/tadah/models/ols.h | 273 +++++++++++++++++++----- src/ols.cpp | 211 ++++++++++++++++++ tests/test_ols.cpp | 140 ++++++++++-- 4 files changed, 558 insertions(+), 68 deletions(-) create mode 100644 src/ols.cpp diff --git a/include/tadah/models/linear_regressor.h b/include/tadah/models/linear_regressor.h index ce111bb..591bec1 100644 --- a/include/tadah/models/linear_regressor.h +++ b/include/tadah/models/linear_regressor.h @@ -38,7 +38,7 @@ class LinearRegressor { double rcond = config.size("LAMBDA")==2 ? config.get<double>("LAMBDA",1) : 1e-8; if (lambda == 0) { - OLS::solve(Phi, T, weights, rcond); + OLS::solve(Phi, T, weights, rcond, OLS::Algorithm::GELSD); } else { double alpha = config.get<double>("ALPHA"); double beta = config.get<double>("BETA"); diff --git a/include/tadah/models/ols.h b/include/tadah/models/ols.h index b95c774..ca94d21 100644 --- a/include/tadah/models/ols.h +++ b/include/tadah/models/ols.h @@ -1,74 +1,243 @@ #ifndef OLS_H #define OLS_H +#include <tadah/core/lapack.h> // Ensure this header includes the necessary LAPACK function declarations #include <tadah/core/core_types.h> -#include <tadah/core/lapack.h> +#include <cmath> +#include <stdexcept> +#include <cstdlib> // For malloc and free /** * @class OLS * @brief Provides functionality for solving Ordinary Least Squares (OLS) problems. * - * Utilizes LAPACK's DGELSD function to solve linear systems where the goal is to minimize - * the Euclidean norm of residuals. + * Utilizes LAPACK's DGELS and DGELSD functions to solve linear systems where the goal is to minimize + * the Euclidean norm of residuals. By default, it uses DGELSD, but users can select DGELS if desired. + * + * **Algorithm Selection:** + * - **GELSD (`DGELSD`):** Uses a singular value decomposition (SVD) approach, which is more robust and + * can handle rank-deficient or ill-conditioned systems. It can provide a minimum-norm solution and + * handle cases where the matrix `A` does not have full rank. `DGELSD` is recommended when precision + * is important, or when the system may be rank-deficient or ill-conditioned. + * - **GELS (`DGELS`):** Uses QR or LQ factorization, which is generally faster but less robust for + * ill-conditioned problems. It assumes that `A` has full rank. `DGELS` is suitable for well-conditioned + * systems where performance is critical and the matrix `A` is expected to have full rank. + * + * **Usage Recommendation:** + * - Use **GELSD** (`Algorithm::GELSD`) when dealing with potentially rank-deficient or ill-conditioned systems, + * or when you require the most accurate solution at the expense of computational time. + * - Use **GELS** (`Algorithm::GELS`) when working with well-conditioned systems where speed is more important + * than handling rank deficiency. + * + * **Workspace Management:** + * - The class provides methods to solve the OLS problem with and without providing a preallocated workspace. + * Preallocating the workspace can improve performance when solving multiple problems of the same size. + * - If the workspace is insufficient, the `solve_with_workspace` method will automatically reallocate it to + * the required size. + * + * **Example:** + * ```cpp + * Matrix A; // Initialize with your data + * Vector B; // Initialize with your data + * Vector weights(A.cols()); + * + * // Solve using default algorithm (GELSD) + * OLS::solve(A, B, weights); + * + * // Solve using GELS algorithm + * OLS::solve(A, B, weights, -1.0, OLS::Algorithm::GELS); + * ``` */ class OLS { - public: +public: + /** + * @brief Enum to select the algorithm used for solving the OLS problem. + */ + enum class Algorithm { + GELS, // Uses DGELS + GELSD // Uses DGELSD + }; + + /** + * @brief Workspace struct to hold pre-allocated memory. + * + * This struct contains the necessary buffers for the LAPACK functions. + * Users can create an instance of `Workspace` and pass it to the `solve_with_workspace` method. + */ + struct Workspace { + double* s; // Singular values array (used by DGELSD) + int* iwork; // Integer workspace array (used by DGELSD) + double* work; // Double workspace array + int lwork; // Size of the work array + int m; // Number of rows in A (for which workspace was allocated) + int n; // Number of columns in A (for which workspace was allocated) + Algorithm algo; // Algorithm for which workspace was allocated + + Workspace(); + ~Workspace(); + + /** + * @brief Allocates memory for the workspace based on problem dimensions and algorithm. + * + * @param m_ Number of rows in matrix A. + * @param n_ Number of columns in matrix A. + * @param algorithm The algorithm for which to allocate workspace. + */ + void allocate(int m_, int n_, Algorithm algorithm = Algorithm::GELSD); + + /** + * @brief Frees the allocated memory. + */ + void release(); + + /** + * @brief Checks if the workspace is sufficient for the given dimensions and algorithm. + * + * @param m_ Number of rows required. + * @param n_ Number of columns required. + * @param algorithm The algorithm required. + * @return True if workspace is sufficient, false otherwise. + */ + bool is_sufficient(int m_, int n_, Algorithm algorithm = Algorithm::GELSD) const; + }; + /** - * @brief Solves the OLS problem for the given matrix and vector. + * @brief Solves the OLS problem for the given matrix and vector without requiring external workspace. * - * This function computes the weights that minimize the difference between - * the target vector and the matrix product. + * @tparam M Type for the matrix A. + * @tparam V Type for the vectors B and weights. + * @param A Input matrix (may be modified during computation). + * @param B Input target vector; contains the solution upon return. + * @param weights Output vector containing the computed weights. + * @param rcond The reciprocal of the condition number threshold (used for DGELSD). Default is -1.0. + * @param algorithm Algorithm to use (GELSD by default). + */ + template <typename M, typename V> + static void solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm); + + /** + * @brief Solves the OLS problem for the given matrix and vector using provided workspace. + * + * @tparam M Type for the matrix A. + * @tparam V Type for the vectors B and weights. + * @param A Input matrix (may be modified during computation). + * @param B Input target vector; contains the solution upon return. + * @param weights Output vector containing the computed weights. + * @param ws Reference to the Workspace instance. + * @param rcond The reciprocal of the condition number threshold (used for DGELSD). Default is -1.0. + * @param algorithm Algorithm to use (GELSD by default). + */ + template <typename M, typename V> + static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm); + + /** + * @brief Solves the OLS problem using default settings (GELSD algorithm and default rcond). * * @tparam M Type for the matrix A. - * @tparam V Type for the vector B. - * @param A Input matrix, which will be destroyed upon exit. - * @param B Input target vector, resized if smaller than matrix columns; contains the solution upon return. + * @tparam V Type for the vectors B and weights. + * @param A Input matrix (may be modified during computation). + * @param B Input target vector; contains the solution upon return. * @param weights Output vector containing the computed weights. */ template <typename M, typename V> - static void solve(M &A, V &B, V &weights, double rcond) { - // Resize B if necessary to match A's column count. - if (B.size() < A.cols()) - B.resize(A.cols()); - - // Variables for LAPACK computation - int m = A.rows(); - int n = A.cols(); - int nrhs = 1; // Number of target columns - double *a = A.ptr(); - int lda = m; - double *b = B.ptr(); - int ldb = std::max(m, n); - double *s = new double[std::min(m, n)]; // Singular values + static void solve(M& A, V& B, V& weights); + + /** + * @brief Solves the OLS problem using default settings and provided workspace. + * + * @tparam M Type for the matrix A. + * @tparam V Type for the vectors B and weights. + * @param A Input matrix (may be modified during computation). + * @param B Input target vector; contains the solution upon return. + * @param weights Output vector containing the computed weights. + * @param ws Reference to the Workspace instance. + */ + template <typename M, typename V> + static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws); +}; + +// Implement templated methods here + +template <typename M, typename V> +void OLS::solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm) +{ + Workspace ws; + solve_with_workspace(A, B, weights, ws, rcond, algorithm); +} + +template <typename M, typename V> +void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm) +{ + int m = A.rows(); + int n = A.cols(); + + // Check for valid algorithm + if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS) { + throw std::invalid_argument("Invalid algorithm specified."); + } + + // Check that dimensions of weights match expected size + if (weights.size() != static_cast<std::size_t>(n)) { + throw std::invalid_argument("weights size is incorrect."); + } + + // Check that B has sufficient size + int maxmn = std::max(m, n); + if (B.size() < static_cast<std::size_t>(maxmn)) { + throw std::invalid_argument("B size is insufficient."); + } + + // Check if workspace is sufficient; if not, reallocate + if (!ws.is_sufficient(m, n, algorithm)) { + ws.allocate(m, n, algorithm); + } + + int nrhs = 1; + double* a = A.ptr(); + int lda = m; + double* b = B.ptr(); + int ldb = maxmn; + int info; + + if (algorithm == Algorithm::GELSD) { int rank; - double *work; - int lwork = -1; // Workspace query - int smlsiz = 25; - int nlvl = std::max(0, int(std::log2(std::min(m, n) / (smlsiz + 1))) + 1); - int *iwork = new int[3 * std::min(m, n) * nlvl + 11 * std::min(m, n)]; - int info; - double wkopt; - - // Query optimal workspace size - dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, s, &rcond, &rank, - &wkopt, &lwork, iwork, &info); - - // Allocate workspace memory - lwork = (int)wkopt; - work = new double[lwork]; - - // Solve the OLS problem - dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, s, &rcond, &rank, - work, &lwork, iwork, &info); - - // Copy solution to weights - weights = B.copy(A.cols()); - - // Cleanup memory - delete[] s; - delete[] work; - delete[] iwork; + + // Solve using DGELSD + ::dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, ws.s, &rcond, &rank, + ws.work, &ws.lwork, ws.iwork, &info); + } else { // Algorithm::GELS + // Solve using DGELS + char trans = 'N'; + ::dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, + ws.work, &ws.lwork, &info); } -}; + + if (info != 0) { + throw std::runtime_error("Error in LAPACK function: info = " + std::to_string(info)); + } + + // Copy solution to weights + for (int i = 0; i < n; ++i) { + weights[i] = b[i]; + } +} + +template <typename M, typename V> +void OLS::solve(M& A, V& B, V& weights) +{ + // Default rcond and algorithm + double rcond = -1.0; + Algorithm algorithm = Algorithm::GELSD; + solve(A, B, weights, rcond, algorithm); +} + +template <typename M, typename V> +void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws) +{ + // Default rcond and algorithm + double rcond = -1.0; + Algorithm algorithm = Algorithm::GELSD; + solve_with_workspace(A, B, weights, ws, rcond, algorithm); +} #endif // OLS_H diff --git a/src/ols.cpp b/src/ols.cpp new file mode 100644 index 0000000..a465a5b --- /dev/null +++ b/src/ols.cpp @@ -0,0 +1,211 @@ +#include <tadah/models/ols.h> +#include <cmath> +#include <stdexcept> +#include <cstdlib> // For malloc and free + +// Ensure that LAPACK functions are declared in the global namespace +// If not, include the necessary headers or declarations + +OLS::Workspace::Workspace() + : s(nullptr), iwork(nullptr), work(nullptr), lwork(0), m(0), n(0), algo(Algorithm::GELSD) +{ +} + +OLS::Workspace::~Workspace() +{ + release(); +} + +void OLS::Workspace::allocate(int m_, int n_, Algorithm algorithm) +{ + // Check for valid algorithm + if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS) { + throw std::invalid_argument("Invalid algorithm specified in Workspace::allocate()."); + } + + // Release existing memory if allocated + release(); + + m = m_; + n = n_; + algo = algorithm; + + if (algorithm == Algorithm::GELSD) { + int minmn = std::min(m, n); + + // Allocate s for singular values + s = reinterpret_cast<double*>(malloc(minmn * sizeof(double))); + if (!s) throw std::bad_alloc(); + + // Compute nlvl and allocate iwork + int smlsiz = 25; + int nlvl = std::max(0, int(std::log2(static_cast<double>(minmn) / (smlsiz + 1))) + 1); + int iwork_size = 3 * minmn * nlvl + 11 * minmn; + iwork = reinterpret_cast<int*>(malloc(iwork_size * sizeof(int))); + if (!iwork) { + release(); + throw std::bad_alloc(); + } + } else { + // For DGELS + s = nullptr; + iwork = nullptr; + } + + // Query for optimal work array size + double wkopt; + int lwork_query = -1; + int nrhs = 1; + int lda = m; + int ldb = std::max(m, n); + + // Allocate minimal dummy arrays for A and B + double* adummy = reinterpret_cast<double*>(malloc(m * n * sizeof(double))); + double* bdummy = reinterpret_cast<double*>(malloc(ldb * nrhs * sizeof(double))); + if (!adummy || !bdummy) { + if (adummy) ::free(adummy); + if (bdummy) ::free(bdummy); + release(); + throw std::bad_alloc(); + } + + int info; + + if (algorithm == Algorithm::GELSD) { + int rank; + double rcond_query = -1; // Use default rcond + + // Set up dummy LAPACK call + ::dgelsd_(&m, &n, &nrhs, adummy, &lda, bdummy, &ldb, s, &rcond_query, &rank, + &wkopt, &lwork_query, iwork, &info); + } else { // Algorithm::GELS + char trans = 'N'; + ::dgels_(&trans, &m, &n, &nrhs, adummy, &lda, bdummy, &ldb, &wkopt, &lwork_query, &info); + } + + // Free dummy arrays + ::free(adummy); + ::free(bdummy); + + if (info != 0) { + release(); + throw std::runtime_error("Error in workspace query: info = " + std::to_string(info)); + } + + lwork = static_cast<int>(wkopt); + work = reinterpret_cast<double*>(malloc(lwork * sizeof(double))); + if (!work) { + release(); + throw std::bad_alloc(); + } +} + +void OLS::Workspace::release() +{ + if (s) ::free(s); + if (iwork) ::free(iwork); + if (work) ::free(work); + s = nullptr; + iwork = nullptr; + work = nullptr; + lwork = 0; + m = 0; + n = 0; + algo = Algorithm::GELSD; +} + +bool OLS::Workspace::is_sufficient(int m_, int n_, Algorithm algorithm) const +{ + // Check for valid algorithm + if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS) { + throw std::invalid_argument("Invalid algorithm specified in Workspace::is_sufficient()."); + } + + // Check if current workspace dimensions and algorithm are sufficient + if (algo != algorithm || m < m_ || n < n_) { + return false; + } + + int maxmn = std::max(m_, n_); + + if (algorithm == Algorithm::GELSD) { + // Check if s is allocated + if (!s) return false; + + // For work size, check if lwork is sufficient + // We can perform a workspace query to find the required lwork + double wkopt; + int lwork_query = -1; + int nrhs = 1; + int lda = m_; + int ldb = maxmn; + + // Allocate minimal dummy arrays for A and B + double* adummy = reinterpret_cast<double*>(malloc(m_ * n_ * sizeof(double))); + double* bdummy = reinterpret_cast<double*>(malloc(ldb * nrhs * sizeof(double))); + if (!adummy || !bdummy) { + if (adummy) ::free(adummy); + if (bdummy) ::free(bdummy); + throw std::bad_alloc(); + } + int rank; + int info; + + // Query optimal lwork + double rcond_query = -1; // Use default rcond + ::dgelsd_(&m_, &n_, &nrhs, adummy, &lda, bdummy, &ldb, s, &rcond_query, &rank, + &wkopt, &lwork_query, iwork, &info); + + // Free dummy arrays + ::free(adummy); + ::free(bdummy); + + if (info != 0) { + throw std::runtime_error("Error in workspace sufficiency check: info = " + std::to_string(info)); + } + + int required_lwork = static_cast<int>(wkopt); + + if (lwork < required_lwork) { + return false; + } + + } else { // Algorithm::GELS + // For DGELS + // Query optimal lwork + double wkopt; + int lwork_query = -1; + int nrhs = 1; + int lda = m_; + int ldb = maxmn; + int info; + char trans = 'N'; + + // Allocate minimal dummy arrays for A and B + double* adummy = reinterpret_cast<double*>(malloc(m_ * n_ * sizeof(double))); + double* bdummy = reinterpret_cast<double*>(malloc(ldb * nrhs * sizeof(double))); + if (!adummy || !bdummy) { + if (adummy) ::free(adummy); + if (bdummy) ::free(bdummy); + throw std::bad_alloc(); + } + + ::dgels_(&trans, &m_, &n_, &nrhs, adummy, &lda, bdummy, &ldb, &wkopt, &lwork_query, &info); + + // Free dummy arrays + ::free(adummy); + ::free(bdummy); + + if (info != 0) { + throw std::runtime_error("Error in workspace sufficiency check: info = " + std::to_string(info)); + } + + int required_lwork = static_cast<int>(wkopt); + if (lwork < required_lwork) { + return false; + } + } + + // All checks passed + return true; +} diff --git a/tests/test_ols.cpp b/tests/test_ols.cpp index f84c6b0..8275369 100644 --- a/tests/test_ols.cpp +++ b/tests/test_ols.cpp @@ -1,22 +1,132 @@ #include "catch2/catch.hpp" +#include <tadah/core/lapack.h> #include <tadah/core/maths.h> #include <tadah/models/ols.h> -#include <tadah/models/ridge_regression.h> #include <iomanip> TEST_CASE("Testing OLS") { - Matrix Phi1(3, 3, {1, 4, 7, 2, 5, 8, 3, 6, 9}); - Matrix Phi2(3, 3, {1, 4, 7, 2, 5, 8, 3, 6, 9}); - aed_type b1(3); - b1[0]=1; - b1[1]=1; - b1[2]=1; - aed_type w(3); - aed_type b2=b1; - - OLS::solve(Phi1,b1,w, 1e-8); - aed_type p= Phi2*w; - REQUIRE_THAT(p[0], Catch::Matchers::WithinRel(b2[0])); - REQUIRE_THAT(p[1], Catch::Matchers::WithinRel(b2[1])); - REQUIRE_THAT(p[2], Catch::Matchers::WithinRel(b2[2])); + using Matrix = Matrix_T<>; // Replace with your actual matrix type. + using Vector = aed_type; // Replace with your actual vector type. + + // Helper function to compare vectors + auto vectors_are_close = [](const Vector& v1, const Vector& v2, double tolerance) { + REQUIRE(v1.size() == v2.size()); + for (size_t i = 0; i < v1.size(); ++i) { + REQUIRE_THAT(v1[i], Catch::Matchers::WithinAbs(v2[i], tolerance)); + } + }; + + // Prepare test data with corrected matrix initialization + Matrix A(3, 3, {1, 4, 7, // First column + 2, 5, 8, // Second column + 3, 6, 10}); // Third column + + Vector expected_weights = {1, 2, 3}; + + // Recompute B = A * expected_weights + Vector B = A * expected_weights; + + SECTION("Solve using default algorithm (GELSD) without workspace") { + Vector weights(3); + OLS::solve(A, B, weights); + vectors_are_close(weights, expected_weights, 1e-4); + } + + // Other sections remain unchanged... + + SECTION("Test handling of rank-deficient matrix") { + // Corrected initialization in column-major order + Matrix A_singular(3, 3, { + 1, 4, 7, // First column + 2, 5, 8, // Second column + 3, 6, 9 // Third column + }); + + // Make second column a multiple of the first: second column = 2 * first column + for (int i = 0; i < 3; ++i) { + A_singular(i,1) = 2 * A_singular(i,0); + } + + // Recompute B_singular to match the modified A_singular and expected weights + Vector expected_weights_singular = {1, 0, 3}; // Since second column is dependent, set weight[1] = 0 + Vector B_singular = A_singular * expected_weights_singular; + + Matrix A_singular2 = A_singular; + Vector B_singular2 = B_singular; + + Vector weights(3); + + // Solve using GELSD, which can handle rank deficiency + OLS::solve(A_singular, B_singular, weights); + + // Since the system is rank-deficient, the solution is not unique + // We can check that A_singular * weights is close to B_singular + Vector B_computed = A_singular2 * weights; + + // Increase tolerance due to potential numerical inaccuracies + vectors_are_close(B_computed, B_singular2, 1e-4); + } + + SECTION("Test solve with overdetermined system") { + // Corrected matrix initialization in column-major order + int m = 5; + int n = 3; + Matrix A_over(m, n, { + 1, 4, 7, 1, 0, // First column + 2, 5, 8, 0, 1, // Second column + 3, 6, 10,0, 0 // Third column + }); + Matrix A_over2 = A_over; + + Vector expected_weights_over = {1, 2, 3}; + + // Recompute B_over + Vector B_over = A_over * expected_weights_over; + Vector B_over2 = B_over;; + + Vector weights(n); + + OLS::solve(A_over, B_over, weights); + + // Compute B_computed = A_over * weights + Vector B_computed = A_over2 * weights; + + // Check that the residual is small + for (int i = 0; i < m; ++i) { + REQUIRE(std::abs(B_over2[i] - B_computed[i]) < 1e-3); // Adjusted tolerance + } + } + + SECTION("Test solve with underdetermined system") { + // Corrected matrix initialization in column-major order + int m = 3; + int n = 5; + Matrix A_under(m, n, { + 1, 6, 11, // First column + 2, 7, 12, // Second column + 3, 8, 13, // Third column + 4, 9, 14, // Fourth column + 5,10,15 // Fifth column + }); + + Vector B_under = {1, 2, 3}; + // Enlarge B_under to size max(m, n) by appending zeros + B_under.resize(std::max(m, n)); + + Vector weights(n); + + OLS::solve(A_under, B_under, weights); + + // Compute B_computed = A_under * weights + Vector B_computed = A_under * weights; + + // Since B_under has been resized, compare only the first m elements + Vector B_under_trimmed = Vector(B_under.ptr(),m); + Vector B_computed_trimmed = Vector(B_computed.ptr(),m); + + vectors_are_close(B_computed_trimmed, B_under_trimmed, 1e-4); + } + + // Rest of the tests... } + -- GitLab