From e449d5672b452f406662e82f2ac9d8b822bbbaf4 Mon Sep 17 00:00:00 2001
From: mkirsz <s1351949@sms.ed.ac.uk>
Date: Sun, 16 Feb 2025 17:11:15 +0000
Subject: [PATCH] Improved OLS

---
 include/tadah/models/linear_regressor.h |   2 +-
 include/tadah/models/ols.h              | 273 +++++++++++++++++++-----
 src/ols.cpp                             | 211 ++++++++++++++++++
 tests/test_ols.cpp                      | 140 ++++++++++--
 4 files changed, 558 insertions(+), 68 deletions(-)
 create mode 100644 src/ols.cpp

diff --git a/include/tadah/models/linear_regressor.h b/include/tadah/models/linear_regressor.h
index ce111bb..591bec1 100644
--- a/include/tadah/models/linear_regressor.h
+++ b/include/tadah/models/linear_regressor.h
@@ -38,7 +38,7 @@ class LinearRegressor {
       double rcond = config.size("LAMBDA")==2 ? config.get<double>("LAMBDA",1) : 1e-8;
 
       if (lambda == 0) {
-        OLS::solve(Phi, T, weights, rcond);
+        OLS::solve(Phi, T, weights, rcond, OLS::Algorithm::GELSD);
       } else {
         double alpha = config.get<double>("ALPHA");
         double beta = config.get<double>("BETA");
diff --git a/include/tadah/models/ols.h b/include/tadah/models/ols.h
index b95c774..ca94d21 100644
--- a/include/tadah/models/ols.h
+++ b/include/tadah/models/ols.h
@@ -1,74 +1,243 @@
 #ifndef OLS_H
 #define OLS_H
 
+#include <tadah/core/lapack.h> // Ensure this header includes the necessary LAPACK function declarations
 #include <tadah/core/core_types.h>
-#include <tadah/core/lapack.h>
+#include <cmath>
+#include <stdexcept>
+#include <cstdlib> // For malloc and free
 
 /**
  * @class OLS
  * @brief Provides functionality for solving Ordinary Least Squares (OLS) problems.
  *
- * Utilizes LAPACK's DGELSD function to solve linear systems where the goal is to minimize 
- * the Euclidean norm of residuals.
+ * Utilizes LAPACK's DGELS and DGELSD functions to solve linear systems where the goal is to minimize 
+ * the Euclidean norm of residuals. By default, it uses DGELSD, but users can select DGELS if desired.
+ *
+ * **Algorithm Selection:**
+ * - **GELSD (`DGELSD`):** Uses a singular value decomposition (SVD) approach, which is more robust and
+ *   can handle rank-deficient or ill-conditioned systems. It can provide a minimum-norm solution and
+ *   handle cases where the matrix `A` does not have full rank. `DGELSD` is recommended when precision
+ *   is important, or when the system may be rank-deficient or ill-conditioned.
+ * - **GELS (`DGELS`):** Uses QR or LQ factorization, which is generally faster but less robust for
+ *   ill-conditioned problems. It assumes that `A` has full rank. `DGELS` is suitable for well-conditioned
+ *   systems where performance is critical and the matrix `A` is expected to have full rank.
+ *
+ * **Usage Recommendation:**
+ * - Use **GELSD** (`Algorithm::GELSD`) when dealing with potentially rank-deficient or ill-conditioned systems,
+ *   or when you require the most accurate solution at the expense of computational time.
+ * - Use **GELS** (`Algorithm::GELS`) when working with well-conditioned systems where speed is more important
+ *   than handling rank deficiency.
+ *
+ * **Workspace Management:**
+ * - The class provides methods to solve the OLS problem with and without providing a preallocated workspace.
+ *   Preallocating the workspace can improve performance when solving multiple problems of the same size.
+ * - If the workspace is insufficient, the `solve_with_workspace` method will automatically reallocate it to
+ *   the required size.
+ *
+ * **Example:**
+ * ```cpp
+ * Matrix A; // Initialize with your data
+ * Vector B; // Initialize with your data
+ * Vector weights(A.cols());
+ *
+ * // Solve using default algorithm (GELSD)
+ * OLS::solve(A, B, weights);
+ *
+ * // Solve using GELS algorithm
+ * OLS::solve(A, B, weights, -1.0, OLS::Algorithm::GELS);
+ * ```
  */
 class OLS {
-  public:
+public:
+    /**
+     * @brief Enum to select the algorithm used for solving the OLS problem.
+     */
+    enum class Algorithm {
+        GELS,   // Uses DGELS
+        GELSD   // Uses DGELSD
+    };
+
+    /**
+     * @brief Workspace struct to hold pre-allocated memory.
+     *
+     * This struct contains the necessary buffers for the LAPACK functions.
+     * Users can create an instance of `Workspace` and pass it to the `solve_with_workspace` method.
+     */
+    struct Workspace {
+        double* s;    // Singular values array (used by DGELSD)
+        int* iwork;   // Integer workspace array (used by DGELSD)
+        double* work; // Double workspace array
+        int lwork;    // Size of the work array
+        int m;        // Number of rows in A (for which workspace was allocated)
+        int n;        // Number of columns in A (for which workspace was allocated)
+        Algorithm algo; // Algorithm for which workspace was allocated
+
+        Workspace();
+        ~Workspace();
+
+        /**
+         * @brief Allocates memory for the workspace based on problem dimensions and algorithm.
+         * 
+         * @param m_ Number of rows in matrix A.
+         * @param n_ Number of columns in matrix A.
+         * @param algorithm The algorithm for which to allocate workspace.
+         */
+        void allocate(int m_, int n_, Algorithm algorithm = Algorithm::GELSD);
+
+        /**
+         * @brief Frees the allocated memory.
+         */
+        void release();
+
+        /**
+         * @brief Checks if the workspace is sufficient for the given dimensions and algorithm.
+         * 
+         * @param m_ Number of rows required.
+         * @param n_ Number of columns required.
+         * @param algorithm The algorithm required.
+         * @return True if workspace is sufficient, false otherwise.
+         */
+        bool is_sufficient(int m_, int n_, Algorithm algorithm = Algorithm::GELSD) const;
+    };
+
     /**
-     * @brief Solves the OLS problem for the given matrix and vector.
+     * @brief Solves the OLS problem for the given matrix and vector without requiring external workspace.
      *
-     * This function computes the weights that minimize the difference between 
-     * the target vector and the matrix product.
+     * @tparam M Type for the matrix A.
+     * @tparam V Type for the vectors B and weights.
+     * @param A Input matrix (may be modified during computation).
+     * @param B Input target vector; contains the solution upon return.
+     * @param weights Output vector containing the computed weights.
+     * @param rcond The reciprocal of the condition number threshold (used for DGELSD). Default is -1.0.
+     * @param algorithm Algorithm to use (GELSD by default).
+     */
+    template <typename M, typename V>
+    static void solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm);
+
+    /**
+     * @brief Solves the OLS problem for the given matrix and vector using provided workspace.
+     *
+     * @tparam M Type for the matrix A.
+     * @tparam V Type for the vectors B and weights.
+     * @param A Input matrix (may be modified during computation).
+     * @param B Input target vector; contains the solution upon return.
+     * @param weights Output vector containing the computed weights.
+     * @param ws Reference to the Workspace instance.
+     * @param rcond The reciprocal of the condition number threshold (used for DGELSD). Default is -1.0.
+     * @param algorithm Algorithm to use (GELSD by default).
+     */
+    template <typename M, typename V>
+    static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm);
+
+    /**
+     * @brief Solves the OLS problem using default settings (GELSD algorithm and default rcond).
      *
      * @tparam M Type for the matrix A.
-     * @tparam V Type for the vector B.
-     * @param A Input matrix, which will be destroyed upon exit.
-     * @param B Input target vector, resized if smaller than matrix columns; contains the solution upon return.
+     * @tparam V Type for the vectors B and weights.
+     * @param A Input matrix (may be modified during computation).
+     * @param B Input target vector; contains the solution upon return.
      * @param weights Output vector containing the computed weights.
      */
     template <typename M, typename V>
-    static void solve(M &A, V &B, V &weights, double rcond) {
-        // Resize B if necessary to match A's column count.
-        if (B.size() < A.cols())
-            B.resize(A.cols());
-
-        // Variables for LAPACK computation
-        int m = A.rows();
-        int n = A.cols();
-        int nrhs = 1; // Number of target columns
-        double *a = A.ptr();
-        int lda = m;
-        double *b = B.ptr();
-        int ldb = std::max(m, n);
-        double *s = new double[std::min(m, n)]; // Singular values
+    static void solve(M& A, V& B, V& weights);
+
+    /**
+     * @brief Solves the OLS problem using default settings and provided workspace.
+     *
+     * @tparam M Type for the matrix A.
+     * @tparam V Type for the vectors B and weights.
+     * @param A Input matrix (may be modified during computation).
+     * @param B Input target vector; contains the solution upon return.
+     * @param weights Output vector containing the computed weights.
+     * @param ws Reference to the Workspace instance.
+     */
+    template <typename M, typename V>
+    static void solve_with_workspace(M& A, V& B, V& weights, Workspace& ws);
+};
+
+// Implement templated methods here
+
+template <typename M, typename V>
+void OLS::solve(M& A, V& B, V& weights, double rcond, Algorithm algorithm)
+{
+    Workspace ws;
+    solve_with_workspace(A, B, weights, ws, rcond, algorithm);
+}
+
+template <typename M, typename V>
+void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws, double rcond, Algorithm algorithm)
+{
+    int m = A.rows();
+    int n = A.cols();
+
+    // Check for valid algorithm
+    if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS) {
+        throw std::invalid_argument("Invalid algorithm specified.");
+    }
+
+    // Check that dimensions of weights match expected size
+    if (weights.size() != static_cast<std::size_t>(n)) {
+        throw std::invalid_argument("weights size is incorrect.");
+    }
+
+    // Check that B has sufficient size
+    int maxmn = std::max(m, n);
+    if (B.size() < static_cast<std::size_t>(maxmn)) {
+        throw std::invalid_argument("B size is insufficient.");
+    }
+
+    // Check if workspace is sufficient; if not, reallocate
+    if (!ws.is_sufficient(m, n, algorithm)) {
+        ws.allocate(m, n, algorithm);
+    }
+
+    int nrhs = 1;
+    double* a = A.ptr();
+    int lda = m;
+    double* b = B.ptr();
+    int ldb = maxmn;
+    int info;
+
+    if (algorithm == Algorithm::GELSD) {
         int rank;
-        double *work;
-        int lwork = -1; // Workspace query
-        int smlsiz = 25;
-        int nlvl = std::max(0, int(std::log2(std::min(m, n) / (smlsiz + 1))) + 1);
-        int *iwork = new int[3 * std::min(m, n) * nlvl + 11 * std::min(m, n)];
-        int info;
-        double wkopt;
-
-        // Query optimal workspace size
-        dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, s, &rcond, &rank,
-                &wkopt, &lwork, iwork, &info);
-        
-        // Allocate workspace memory
-        lwork = (int)wkopt;
-        work = new double[lwork];
-
-        // Solve the OLS problem
-        dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, s, &rcond, &rank,
-                work, &lwork, iwork, &info);
-
-        // Copy solution to weights
-        weights = B.copy(A.cols());
-
-        // Cleanup memory
-        delete[] s;
-        delete[] work;
-        delete[] iwork;
+
+        // Solve using DGELSD
+        ::dgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, ws.s, &rcond, &rank,
+                ws.work, &ws.lwork, ws.iwork, &info);
+    } else { // Algorithm::GELS
+        // Solve using DGELS
+        char trans = 'N';
+        ::dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb,
+               ws.work, &ws.lwork, &info);
     }
-};
+
+    if (info != 0) {
+        throw std::runtime_error("Error in LAPACK function: info = " + std::to_string(info));
+    }
+
+    // Copy solution to weights
+    for (int i = 0; i < n; ++i) {
+        weights[i] = b[i];
+    }
+}
+
+template <typename M, typename V>
+void OLS::solve(M& A, V& B, V& weights)
+{
+    // Default rcond and algorithm
+    double rcond = -1.0;
+    Algorithm algorithm = Algorithm::GELSD;
+    solve(A, B, weights, rcond, algorithm);
+}
+
+template <typename M, typename V>
+void OLS::solve_with_workspace(M& A, V& B, V& weights, Workspace& ws)
+{
+    // Default rcond and algorithm
+    double rcond = -1.0;
+    Algorithm algorithm = Algorithm::GELSD;
+    solve_with_workspace(A, B, weights, ws, rcond, algorithm);
+}
 
 #endif // OLS_H
diff --git a/src/ols.cpp b/src/ols.cpp
new file mode 100644
index 0000000..a465a5b
--- /dev/null
+++ b/src/ols.cpp
@@ -0,0 +1,211 @@
+#include <tadah/models/ols.h>
+#include <cmath>
+#include <stdexcept>
+#include <cstdlib> // For malloc and free
+
+// Ensure that LAPACK functions are declared in the global namespace
+// If not, include the necessary headers or declarations
+
+OLS::Workspace::Workspace()
+    : s(nullptr), iwork(nullptr), work(nullptr), lwork(0), m(0), n(0), algo(Algorithm::GELSD)
+{
+}
+
+OLS::Workspace::~Workspace()
+{
+    release();
+}
+
+void OLS::Workspace::allocate(int m_, int n_, Algorithm algorithm)
+{
+    // Check for valid algorithm
+    if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS) {
+        throw std::invalid_argument("Invalid algorithm specified in Workspace::allocate().");
+    }
+
+    // Release existing memory if allocated
+    release();
+
+    m = m_;
+    n = n_;
+    algo = algorithm;
+
+    if (algorithm == Algorithm::GELSD) {
+        int minmn = std::min(m, n);
+
+        // Allocate s for singular values
+        s = reinterpret_cast<double*>(malloc(minmn * sizeof(double)));
+        if (!s) throw std::bad_alloc();
+
+        // Compute nlvl and allocate iwork
+        int smlsiz = 25;
+        int nlvl = std::max(0, int(std::log2(static_cast<double>(minmn) / (smlsiz + 1))) + 1);
+        int iwork_size = 3 * minmn * nlvl + 11 * minmn;
+        iwork = reinterpret_cast<int*>(malloc(iwork_size * sizeof(int)));
+        if (!iwork) {
+            release();
+            throw std::bad_alloc();
+        }
+    } else {
+        // For DGELS
+        s = nullptr;
+        iwork = nullptr;
+    }
+
+    // Query for optimal work array size
+    double wkopt;
+    int lwork_query = -1;
+    int nrhs = 1;
+    int lda = m;
+    int ldb = std::max(m, n);
+
+    // Allocate minimal dummy arrays for A and B
+    double* adummy = reinterpret_cast<double*>(malloc(m * n * sizeof(double)));
+    double* bdummy = reinterpret_cast<double*>(malloc(ldb * nrhs * sizeof(double)));
+    if (!adummy || !bdummy) {
+        if (adummy) ::free(adummy);
+        if (bdummy) ::free(bdummy);
+        release();
+        throw std::bad_alloc();
+    }
+
+    int info;
+
+    if (algorithm == Algorithm::GELSD) {
+        int rank;
+        double rcond_query = -1;  // Use default rcond
+
+        // Set up dummy LAPACK call
+        ::dgelsd_(&m, &n, &nrhs, adummy, &lda, bdummy, &ldb, s, &rcond_query, &rank,
+                &wkopt, &lwork_query, iwork, &info);
+    } else { // Algorithm::GELS
+        char trans = 'N';
+        ::dgels_(&trans, &m, &n, &nrhs, adummy, &lda, bdummy, &ldb, &wkopt, &lwork_query, &info);
+    }
+
+    // Free dummy arrays
+    ::free(adummy);
+    ::free(bdummy);
+
+    if (info != 0) {
+        release();
+        throw std::runtime_error("Error in workspace query: info = " + std::to_string(info));
+    }
+
+    lwork = static_cast<int>(wkopt);
+    work = reinterpret_cast<double*>(malloc(lwork * sizeof(double)));
+    if (!work) {
+        release();
+        throw std::bad_alloc();
+    }
+}
+
+void OLS::Workspace::release()
+{
+    if (s) ::free(s);
+    if (iwork) ::free(iwork);
+    if (work) ::free(work);
+    s = nullptr;
+    iwork = nullptr;
+    work = nullptr;
+    lwork = 0;
+    m = 0;
+    n = 0;
+    algo = Algorithm::GELSD;
+}
+
+bool OLS::Workspace::is_sufficient(int m_, int n_, Algorithm algorithm) const
+{
+    // Check for valid algorithm
+    if (algorithm != Algorithm::GELSD && algorithm != Algorithm::GELS) {
+        throw std::invalid_argument("Invalid algorithm specified in Workspace::is_sufficient().");
+    }
+
+    // Check if current workspace dimensions and algorithm are sufficient
+    if (algo != algorithm || m < m_ || n < n_) {
+        return false;
+    }
+
+    int maxmn = std::max(m_, n_);
+
+    if (algorithm == Algorithm::GELSD) {
+        // Check if s is allocated
+        if (!s) return false;
+
+        // For work size, check if lwork is sufficient
+        // We can perform a workspace query to find the required lwork
+        double wkopt;
+        int lwork_query = -1;
+        int nrhs = 1;
+        int lda = m_;
+        int ldb = maxmn;
+
+        // Allocate minimal dummy arrays for A and B
+        double* adummy = reinterpret_cast<double*>(malloc(m_ * n_ * sizeof(double)));
+        double* bdummy = reinterpret_cast<double*>(malloc(ldb * nrhs * sizeof(double)));
+        if (!adummy || !bdummy) {
+            if (adummy) ::free(adummy);
+            if (bdummy) ::free(bdummy);
+            throw std::bad_alloc();
+        }
+        int rank;
+        int info;
+
+        // Query optimal lwork
+        double rcond_query = -1;  // Use default rcond
+        ::dgelsd_(&m_, &n_, &nrhs, adummy, &lda, bdummy, &ldb, s, &rcond_query, &rank,
+                &wkopt, &lwork_query, iwork, &info);
+
+        // Free dummy arrays
+        ::free(adummy);
+        ::free(bdummy);
+
+        if (info != 0) {
+            throw std::runtime_error("Error in workspace sufficiency check: info = " + std::to_string(info));
+        }
+
+        int required_lwork = static_cast<int>(wkopt);
+
+        if (lwork < required_lwork) {
+            return false;
+        }
+
+    } else { // Algorithm::GELS
+        // For DGELS
+        // Query optimal lwork
+        double wkopt;
+        int lwork_query = -1;
+        int nrhs = 1;
+        int lda = m_;
+        int ldb = maxmn;
+        int info;
+        char trans = 'N';
+
+        // Allocate minimal dummy arrays for A and B
+        double* adummy = reinterpret_cast<double*>(malloc(m_ * n_ * sizeof(double)));
+        double* bdummy = reinterpret_cast<double*>(malloc(ldb * nrhs * sizeof(double)));
+        if (!adummy || !bdummy) {
+            if (adummy) ::free(adummy);
+            if (bdummy) ::free(bdummy);
+            throw std::bad_alloc();
+        }
+
+        ::dgels_(&trans, &m_, &n_, &nrhs, adummy, &lda, bdummy, &ldb, &wkopt, &lwork_query, &info);
+
+        // Free dummy arrays
+        ::free(adummy);
+        ::free(bdummy);
+
+        if (info != 0) {
+            throw std::runtime_error("Error in workspace sufficiency check: info = " + std::to_string(info));
+        }
+
+        int required_lwork = static_cast<int>(wkopt);
+        if (lwork < required_lwork) {
+            return false;
+        }
+    }
+
+    // All checks passed
+    return true;
+}
diff --git a/tests/test_ols.cpp b/tests/test_ols.cpp
index f84c6b0..8275369 100644
--- a/tests/test_ols.cpp
+++ b/tests/test_ols.cpp
@@ -1,22 +1,132 @@
 #include "catch2/catch.hpp"
+#include <tadah/core/lapack.h>
 #include <tadah/core/maths.h>
 #include <tadah/models/ols.h>
-#include <tadah/models/ridge_regression.h>
 #include <iomanip>
 
 TEST_CASE("Testing OLS") {
-  Matrix Phi1(3, 3, {1, 4, 7, 2, 5, 8, 3, 6, 9});
-  Matrix Phi2(3, 3, {1, 4, 7, 2, 5, 8, 3, 6, 9});
-  aed_type b1(3);
-  b1[0]=1;
-  b1[1]=1;
-  b1[2]=1;
-  aed_type w(3);
-  aed_type b2=b1;
-
-  OLS::solve(Phi1,b1,w, 1e-8);
-  aed_type p= Phi2*w;
-  REQUIRE_THAT(p[0], Catch::Matchers::WithinRel(b2[0]));
-  REQUIRE_THAT(p[1], Catch::Matchers::WithinRel(b2[1]));
-  REQUIRE_THAT(p[2], Catch::Matchers::WithinRel(b2[2]));
+  using Matrix = Matrix_T<>; // Replace with your actual matrix type.
+  using Vector = aed_type;   // Replace with your actual vector type.
+
+  // Helper function to compare vectors
+  auto vectors_are_close = [](const Vector& v1, const Vector& v2, double tolerance) {
+    REQUIRE(v1.size() == v2.size());
+    for (size_t i = 0; i < v1.size(); ++i) {
+      REQUIRE_THAT(v1[i], Catch::Matchers::WithinAbs(v2[i], tolerance));
+    }
+  };
+
+  // Prepare test data with corrected matrix initialization
+  Matrix A(3, 3, {1, 4, 7,   // First column
+    2, 5, 8,   // Second column
+    3, 6, 10}); // Third column
+
+  Vector expected_weights = {1, 2, 3};
+
+  // Recompute B = A * expected_weights
+  Vector B = A * expected_weights;
+
+  SECTION("Solve using default algorithm (GELSD) without workspace") {
+    Vector weights(3);
+    OLS::solve(A, B, weights);
+    vectors_are_close(weights, expected_weights, 1e-4);
+  }
+
+  // Other sections remain unchanged...
+
+  SECTION("Test handling of rank-deficient matrix") {
+    // Corrected initialization in column-major order
+    Matrix A_singular(3, 3, {
+      1, 4, 7,  // First column
+      2, 5, 8,  // Second column
+      3, 6, 9   // Third column
+    });
+
+    // Make second column a multiple of the first: second column = 2 * first column
+    for (int i = 0; i < 3; ++i) {
+      A_singular(i,1) = 2 * A_singular(i,0);
+    }
+
+    // Recompute B_singular to match the modified A_singular and expected weights
+    Vector expected_weights_singular = {1, 0, 3}; // Since second column is dependent, set weight[1] = 0
+    Vector B_singular = A_singular * expected_weights_singular;
+
+    Matrix A_singular2 = A_singular;
+    Vector B_singular2 = B_singular;
+
+    Vector weights(3);
+
+    // Solve using GELSD, which can handle rank deficiency
+    OLS::solve(A_singular, B_singular, weights);
+
+    // Since the system is rank-deficient, the solution is not unique
+    // We can check that A_singular * weights is close to B_singular
+    Vector B_computed = A_singular2 * weights;
+
+    // Increase tolerance due to potential numerical inaccuracies
+    vectors_are_close(B_computed, B_singular2, 1e-4);
+  }
+
+  SECTION("Test solve with overdetermined system") {
+    // Corrected matrix initialization in column-major order
+    int m = 5;
+    int n = 3;
+    Matrix A_over(m, n, {
+      1, 4, 7, 1, 0, // First column
+      2, 5, 8, 0, 1, // Second column
+      3, 6, 10,0, 0  // Third column
+    });
+    Matrix A_over2 = A_over;
+
+    Vector expected_weights_over = {1, 2, 3};
+
+    // Recompute B_over
+    Vector B_over = A_over * expected_weights_over;
+    Vector B_over2 = B_over;;
+
+    Vector weights(n);
+
+    OLS::solve(A_over, B_over, weights);
+
+    // Compute B_computed = A_over * weights
+    Vector B_computed = A_over2 * weights;
+
+    // Check that the residual is small
+    for (int i = 0; i < m; ++i) {
+      REQUIRE(std::abs(B_over2[i] - B_computed[i]) < 1e-3); // Adjusted tolerance
+    }
+  }
+
+  SECTION("Test solve with underdetermined system") {
+    // Corrected matrix initialization in column-major order
+    int m = 3;
+    int n = 5;
+    Matrix A_under(m, n, {
+      1, 6, 11, // First column
+      2, 7, 12, // Second column
+      3, 8, 13, // Third column
+      4, 9, 14, // Fourth column
+      5,10,15  // Fifth column
+    });
+
+    Vector B_under = {1, 2, 3};
+    // Enlarge B_under to size max(m, n) by appending zeros
+    B_under.resize(std::max(m, n));
+
+    Vector weights(n);
+
+    OLS::solve(A_under, B_under, weights);
+
+    // Compute B_computed = A_under * weights
+    Vector B_computed = A_under * weights;
+
+    // Since B_under has been resized, compare only the first m elements
+    Vector B_under_trimmed = Vector(B_under.ptr(),m);
+    Vector B_computed_trimmed = Vector(B_computed.ptr(),m);
+
+    vectors_are_close(B_computed_trimmed, B_under_trimmed, 1e-4);
+  }
+
+  // Rest of the tests...
 }
+
-- 
GitLab